mirror of
https://github.com/wassname/Open-Assistant.git
synced 2026-06-27 16:10:30 +08:00
[fix] Add working A100 config for deberta-xxlarge (deepspeed stuck during evaluation, deadlock?)
This commit is contained in:
@@ -0,0 +1,17 @@
|
||||
model_name: microsoft/deberta-v2-xxlarge
|
||||
learning_rate: 2e-6
|
||||
scheduler: cosine
|
||||
gradient_checkpointing: false
|
||||
gradient_accumulation_steps: 12
|
||||
per_device_train_batch_size: 2
|
||||
per_device_eval_batch_size: 4
|
||||
warmup_steps: 600
|
||||
eval_steps: 1000000
|
||||
save_steps: 1000
|
||||
max_length: 400
|
||||
num_train_epochs: 3
|
||||
datasets:
|
||||
- webgpt
|
||||
- hfsummary
|
||||
- anthropic_rlhf
|
||||
- gptsynthetic
|
||||
@@ -103,6 +103,7 @@ def argument_parsing(parser):
|
||||
"gradient_accumulation_steps",
|
||||
"num_train_epochs",
|
||||
"save_steps",
|
||||
"eval_steps",
|
||||
"per_device_train_batch_size",
|
||||
"per_device_eval_batch_size",
|
||||
]:
|
||||
@@ -142,11 +143,3 @@ def get_datasets(dataset_list: List[AnyStr], tokenizer):
|
||||
evals["anthropic_rlhf"] = eval
|
||||
train = ConcatDataset(train_datasets)
|
||||
return train, evals
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from transformers import AutoModelForSequenceClassification
|
||||
|
||||
model = AutoModelForSequenceClassification.from_pretrained("bigscience/bloomz-560m")
|
||||
freeze_top_n_layers(model, 10)
|
||||
print(model.state_dict().keys())
|
||||
|
||||
Reference in New Issue
Block a user