deactivtae samples mixing by default

This commit is contained in:
Sotirios Anagnostidis
2023-02-11 10:33:25 +01:00
parent 631ba14b34
commit 44ed44e05d
2 changed files with 6 additions and 1 deletions
@@ -48,6 +48,7 @@ defaults:
poly_eps: 1.0
fuse_gelu: true
log_wandb: true
samples_mixing: false # uses collator that mixes samples in the batch to create a single sample with possible multiple tasks within
galactica-125m:
learning_rate: 5e-5
+5 -1
View File
@@ -238,7 +238,11 @@ def get_dataset(conf, tokenizer):
train = ConcatDataset(train_datasets)
collate_fn = DialogueDataCollator(tokenizer, max_length=conf.max_length)
train_collate_fn = TrainDialogueDataCollator(tokenizer, max_length=conf.max_length)
train_collate_fn = (
TrainDialogueDataCollator(tokenizer, max_length=conf.max_length) if conf.samples_mixing else collate_fn
)
return train, evals, collate_fn, train_collate_fn