From d0942a325630b82494c603d6c8977f3755b14983 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20O=C5=BC=C3=B3g?= <58388001+SzymonOzog@users.noreply.github.com> Date: Sat, 7 Jan 2023 16:36:38 +0100 Subject: [PATCH 1/2] Added option to use cosine scheduler --- model/reward/instructor/trainer.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py index f9266d70..68a58a38 100644 --- a/model/reward/instructor/trainer.py +++ b/model/reward/instructor/trainer.py @@ -11,6 +11,7 @@ from rank_datasets import DataCollatorForPairRank, HFSummary, RankGenCollator, W from torch import nn from torch.utils.data import ConcatDataset, Dataset from transformers import ( + AdamW, AutoModelForSequenceClassification, DataCollator, EvalPrediction, @@ -19,6 +20,8 @@ from transformers import ( Trainer, TrainerCallback, TrainingArguments, + get_cosine_schedule_with_warmup, + get_linear_schedule_with_warmup, ) from utils import argument_parsing, freeze_top_n_layers, get_tokenizer, train_val_dataset @@ -179,7 +182,7 @@ if __name__ == "__main__": evaluation_strategy="steps", eval_steps=training_conf["eval_steps"], save_steps=1000, - report_to="local", + report_to="wandb", ) train_datasets, evals = [], {} if "webgpt" in training_conf["datasets"]: @@ -202,6 +205,21 @@ if __name__ == "__main__": else: collate_fn = DataCollatorForPairRank(tokenizer, max_length=training_conf["max_length"]) assert len(evals) > 0 + + optimizer = AdamW(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) + scheduler = None + if "scheduler" in training_conf: + if training_conf["scheduler"] == "linear": + scheduler = get_linear_schedule_with_warmup() + elif training_conf["scheduler"] == "cosine": + scheduler = get_cosine_schedule_with_warmup( + optimizer, + num_warmup_steps=args.warmup_steps, + num_training_steps=len(train) + * args.num_train_epochs + / (args.per_device_train_batch_size * args.gradient_accumulation_steps), + ) + trainer = RankTrainer( model=model, model_name=model_name, @@ -211,6 +229,7 @@ if __name__ == "__main__": data_collator=collate_fn, tokenizer=tokenizer, compute_metrics=compute_metrics, + optimizers=(optimizer, scheduler), ) # trainer.evaluate() trainer.train() From f304921bd59ab4af5d664215533ac880770f0b44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20O=C5=BC=C3=B3g?= <58388001+SzymonOzog@users.noreply.github.com> Date: Sat, 7 Jan 2023 16:36:55 +0100 Subject: [PATCH 2/2] Added deberta configs --- .../instructor/configs/deberta-v2-xlarge.yml | 15 +++++++++++++++ .../reward/instructor/configs/deberta-v3-base.yml | 14 ++++++++++++++ .../configs/deberta-v3-large-squad2.yml | 13 +++++++++++++ .../instructor/configs/deberta-v3-large.yml | 14 ++++++++++++++ 4 files changed, 56 insertions(+) create mode 100644 model/reward/instructor/configs/deberta-v2-xlarge.yml create mode 100644 model/reward/instructor/configs/deberta-v3-base.yml create mode 100644 model/reward/instructor/configs/deberta-v3-large-squad2.yml create mode 100644 model/reward/instructor/configs/deberta-v3-large.yml diff --git a/model/reward/instructor/configs/deberta-v2-xlarge.yml b/model/reward/instructor/configs/deberta-v2-xlarge.yml new file mode 100644 index 00000000..8bc4a182 --- /dev/null +++ b/model/reward/instructor/configs/deberta-v2-xlarge.yml @@ -0,0 +1,15 @@ +model_name: microsoft/deberta-v2-xlarge +learning_rate: 1e-5 +freeze_layer: 15 +scheduler: cosine +gradient_checkpointing: false +gradient_accumulation_steps: 16 +per_device_train_batch_size: 1 +warmup_steps: 600 +eval_steps: 200 +save_steps: 500 +max_length: 512 +num_train_epochs: 2 +datasets: + - webgpt + - hfsummary diff --git a/model/reward/instructor/configs/deberta-v3-base.yml b/model/reward/instructor/configs/deberta-v3-base.yml new file mode 100644 index 00000000..7023709c --- /dev/null +++ b/model/reward/instructor/configs/deberta-v3-base.yml @@ -0,0 +1,14 @@ +model_name: microsoft/deberta-v3-base +learning_rate: 1e-5 +scheduler: cosine +gradient_checkpointing: false +gradient_accumulation_steps: 32 +per_device_train_batch_size: 2 +warmup_steps: 600 +eval_steps: 200 +save_steps: 500 +max_length: 512 +num_train_epochs: 2 +datasets: + - webgpt + - hfsummary diff --git a/model/reward/instructor/configs/deberta-v3-large-squad2.yml b/model/reward/instructor/configs/deberta-v3-large-squad2.yml new file mode 100644 index 00000000..47275309 --- /dev/null +++ b/model/reward/instructor/configs/deberta-v3-large-squad2.yml @@ -0,0 +1,13 @@ +model_name: deepset/deberta-v3-large-squad2 +learning_rate: 1e-5 +gradient_checkpointing: false +gradient_accumulation_steps: 32 +per_device_train_batch_size: 1 +warmup_steps: 600 +eval_steps: 200 +save_steps: 500 +max_length: 512 +num_train_epochs: 2 +datasets: + - webgpt + - hfsummary diff --git a/model/reward/instructor/configs/deberta-v3-large.yml b/model/reward/instructor/configs/deberta-v3-large.yml new file mode 100644 index 00000000..0a910408 --- /dev/null +++ b/model/reward/instructor/configs/deberta-v3-large.yml @@ -0,0 +1,14 @@ +model_name: microsoft/deberta-v3-large +learning_rate: 1e-5 +scheduler: cosine +gradient_checkpointing: false +gradient_accumulation_steps: 32 +per_device_train_batch_size: 1 +warmup_steps: 600 +eval_steps: 200 +save_steps: 500 +max_length: 512 +num_train_epochs: 2 +datasets: + - webgpt + - hfsummary