diff --git a/model/reward/instructor/configs/galactica-125m.yml b/model/reward/instructor/configs/galactica-125m.yml new file mode 100644 index 00000000..55e093f5 --- /dev/null +++ b/model/reward/instructor/configs/galactica-125m.yml @@ -0,0 +1,13 @@ +model_name: facebook/galactica-125m +learning_rate: 1e-5 +gradient_checkpointing: false +gradient_accumulation_steps: 32 +per_device_train_batch_size: 2 +warmup_steps: 600 +eval_steps: 200 +save_steps: 500 +max_length: 512 +num_train_epochs: 2 +datasets: + - webgpt + - hfsummary \ No newline at end of file diff --git a/model/reward/instructor/configs/galactica-1b.yml b/model/reward/instructor/configs/galactica-1b.yml new file mode 100644 index 00000000..48ad439b --- /dev/null +++ b/model/reward/instructor/configs/galactica-1b.yml @@ -0,0 +1,8 @@ +model_name: facebook/galactica-1.3b +learning_rate: 6e-6 +gradient_checkpointing: false +gradient_accumulation_steps: 16 +per_device_train_batch_size: 4 +warmup_steps: 600 +eval_steps: 200 +save_steps: 500 \ No newline at end of file diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py index 06bb8098..dbdd91ba 100644 --- a/model/reward/instructor/trainer.py +++ b/model/reward/instructor/trainer.py @@ -109,7 +109,7 @@ if __name__ == "__main__": # half_precision_backend="apex", fp16=True, gradient_checkpointing=training_conf['gradient_checkpointing'], - gradient_accumulation_steps=training_conf['gradient_checkpointing'], + gradient_accumulation_steps=training_conf['gradient_accumulation_steps'], per_device_train_batch_size=training_conf['per_device_train_batch_size'], per_device_eval_batch_size=5, weight_decay=0.01, @@ -132,8 +132,8 @@ if __name__ == "__main__": sum_train, sum_eval = train_val_dataset(summary_dataset) train_datasets.append(sum_train) evals['hfsummary'] = sum_eval - - collate_fn = DataCollatorForPairRank(tokenizer, max_length=training_conf['max_length']) + train = ConcatDataset(train_datasets) + collate_fn = DataCollatorForPairRank(tokenizer, max_length=training_conf['max_length'], drop_token_type= 'galactica' in model_name) trainer = RankTrainer( model, args, diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py index 4867087c..733e6ea7 100644 --- a/model/reward/instructor/utils.py +++ b/model/reward/instructor/utils.py @@ -74,6 +74,10 @@ def argument_parsing(parser): with open(args.config, 'r', encoding='utf-8') as f: training_conf = yaml.safe_load(f.read()) - return { **default_params, **training_conf } - + params = { **default_params, **training_conf } + params['gradient_accumulation_steps'] = int(params['gradient_accumulation_steps']) + params['num_train_epochs'] = int(params['num_train_epochs']) + params['per_device_train_batch_size'] = int(params['per_device_train_batch_size']) + params['learning_rate'] = float(params['learning_rate']) + return params