diff --git a/model/reward/instructor/configs/deberta-v2-xxlarge-a100.yaml b/model/reward/instructor/configs/deberta-v2-xxlarge-a100.yaml new file mode 100644 index 00000000..e9ec60c5 --- /dev/null +++ b/model/reward/instructor/configs/deberta-v2-xxlarge-a100.yaml @@ -0,0 +1,17 @@ +model_name: microsoft/deberta-v2-xxlarge +learning_rate: 2e-6 +scheduler: cosine +gradient_checkpointing: false +gradient_accumulation_steps: 12 +per_device_train_batch_size: 2 +per_device_eval_batch_size: 4 +warmup_steps: 600 +eval_steps: 1000000 +save_steps: 1000 +max_length: 400 +num_train_epochs: 3 +datasets: + - webgpt + - hfsummary + - anthropic_rlhf + - gptsynthetic diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py index f3a337f6..94a256c2 100644 --- a/model/reward/instructor/utils.py +++ b/model/reward/instructor/utils.py @@ -103,6 +103,7 @@ def argument_parsing(parser): "gradient_accumulation_steps", "num_train_epochs", "save_steps", + "eval_steps", "per_device_train_batch_size", "per_device_eval_batch_size", ]: @@ -142,11 +143,3 @@ def get_datasets(dataset_list: List[AnyStr], tokenizer): evals["anthropic_rlhf"] = eval train = ConcatDataset(train_datasets) return train, evals - - -if __name__ == "__main__": - from transformers import AutoModelForSequenceClassification - - model = AutoModelForSequenceClassification.from_pretrained("bigscience/bloomz-560m") - freeze_top_n_layers(model, 10) - print(model.state_dict().keys())