From 2b7e97e30720f7a81f989f58749a18309adb0eb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9gory=20Ch=C3=A2tel?= Date: Wed, 4 Jul 2018 13:49:50 +0200 Subject: [PATCH 1/2] Creating the model no matter what the number of GPU is. --- train.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/train.py b/train.py index ff5acfb..dd76d01 100644 --- a/train.py +++ b/train.py @@ -263,8 +263,7 @@ if __name__ == '__main__': n_batch_train = args.n_batch * max(n_gpu, 1) n_updates_total = (n_train // n_batch_train) * args.n_iter - if n_gpu > 1: - dh_model = DoubleHeadModel(args, clf_token, vocab, n_ctx) + dh_model = DoubleHeadModel(args, clf_token, vocab, n_ctx) criterion = nn.CrossEntropyLoss(reduce=False) model_opt = OpenAIAdam(dh_model.parameters(), From be407cdd37099324a0aa9fb7619cc1db329fd09d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9gory=20Ch=C3=A2tel?= Date: Wed, 4 Jul 2018 13:50:19 +0200 Subject: [PATCH 2/2] Solving missing variable issue. --- model_pytorch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/model_pytorch.py b/model_pytorch.py index 279dcf8..d6b7dba 100644 --- a/model_pytorch.py +++ b/model_pytorch.py @@ -177,6 +177,7 @@ class LMHead(nn.Module): def __init__(self, model, cfg): super(LMHead, self).__init__() self.n_embd = cfg.n_embd + embed_shape = model.embed.weight.shape self.decoder = nn.Linear(embed_shape[1], embed_shape[0], bias=False) self.decoder.weight = model.embed.weight # Tied weights