Merge pull request #15 from rodgzilla/master

Fixing issue #13 and #14.
This commit is contained in:
Thomas Wolf
2018-07-04 13:57:01 +02:00
committed by GitHub
2 changed files with 2 additions and 2 deletions
+1
View File
@@ -177,6 +177,7 @@ class LMHead(nn.Module):
def __init__(self, model, cfg):
super(LMHead, self).__init__()
self.n_embd = cfg.n_embd
embed_shape = model.embed.weight.shape
self.decoder = nn.Linear(embed_shape[1], embed_shape[0], bias=False)
self.decoder.weight = model.embed.weight # Tied weights
+1 -2
View File
@@ -263,8 +263,7 @@ if __name__ == '__main__':
n_batch_train = args.n_batch * max(n_gpu, 1)
n_updates_total = (n_train // n_batch_train) * args.n_iter
if n_gpu > 1:
dh_model = DoubleHeadModel(args, clf_token, vocab, n_ctx)
dh_model = DoubleHeadModel(args, clf_token, vocab, n_ctx)
criterion = nn.CrossEntropyLoss(reduce=False)
model_opt = OpenAIAdam(dh_model.parameters(),