[feature] Add galactica training config

This commit is contained in:
theblackcat102
2023-01-01 01:25:53 +08:00
parent 24e06626f4
commit 918b7b7ec0
4 changed files with 30 additions and 5 deletions
@@ -0,0 +1,13 @@
model_name: facebook/galactica-125m
learning_rate: 1e-5
gradient_checkpointing: false
gradient_accumulation_steps: 32
per_device_train_batch_size: 2
warmup_steps: 600
eval_steps: 200
save_steps: 500
max_length: 512
num_train_epochs: 2
datasets:
- webgpt
- hfsummary
@@ -0,0 +1,8 @@
model_name: facebook/galactica-1.3b
learning_rate: 6e-6
gradient_checkpointing: false
gradient_accumulation_steps: 16
per_device_train_batch_size: 4
warmup_steps: 600
eval_steps: 200
save_steps: 500
+3 -3
View File
@@ -109,7 +109,7 @@ if __name__ == "__main__":
# half_precision_backend="apex",
fp16=True,
gradient_checkpointing=training_conf['gradient_checkpointing'],
gradient_accumulation_steps=training_conf['gradient_checkpointing'],
gradient_accumulation_steps=training_conf['gradient_accumulation_steps'],
per_device_train_batch_size=training_conf['per_device_train_batch_size'],
per_device_eval_batch_size=5,
weight_decay=0.01,
@@ -132,8 +132,8 @@ if __name__ == "__main__":
sum_train, sum_eval = train_val_dataset(summary_dataset)
train_datasets.append(sum_train)
evals['hfsummary'] = sum_eval
collate_fn = DataCollatorForPairRank(tokenizer, max_length=training_conf['max_length'])
train = ConcatDataset(train_datasets)
collate_fn = DataCollatorForPairRank(tokenizer, max_length=training_conf['max_length'], drop_token_type= 'galactica' in model_name)
trainer = RankTrainer(
model,
args,
+6 -2
View File
@@ -74,6 +74,10 @@ def argument_parsing(parser):
with open(args.config, 'r', encoding='utf-8') as f:
training_conf = yaml.safe_load(f.read())
return { **default_params, **training_conf }
params = { **default_params, **training_conf }
params['gradient_accumulation_steps'] = int(params['gradient_accumulation_steps'])
params['num_train_epochs'] = int(params['num_train_epochs'])
params['per_device_train_batch_size'] = int(params['per_device_train_batch_size'])
params['learning_rate'] = float(params['learning_rate'])
return params