diff --git a/model/reward/instructor/configs/rankgen-t5-base-fp16.yml b/model/reward/instructor/configs/rankgen-t5-base-fp16.yml new file mode 100644 index 00000000..c6f2a5e0 --- /dev/null +++ b/model/reward/instructor/configs/rankgen-t5-base-fp16.yml @@ -0,0 +1,16 @@ +model_name: kalpeshk2011/rankgen-t5-base-all +tokenizer_name: google/t5-v1_1-base +learning_rate: 6e-6 +gradient_checkpointing: false +fp16: true +gradient_accumulation_steps: 16 +per_device_train_batch_size: 2 +warmup_steps: 600 +freeze_layer: 20 +eval_steps: 200 +save_steps: 500 +max_length: 400 +num_train_epochs: 2 +datasets: + - webgpt + - hfsummary diff --git a/model/reward/instructor/configs/rankgen-t5-base.yml b/model/reward/instructor/configs/rankgen-t5-base.yml index 6776ad47..bcb4d613 100644 --- a/model/reward/instructor/configs/rankgen-t5-base.yml +++ b/model/reward/instructor/configs/rankgen-t5-base.yml @@ -1,4 +1,7 @@ model_name: kalpeshk2011/rankgen-t5-base-all +# model_name: kalpeshk2011/rankgen-t5-xl-all +# model_name: kalpeshk2011/rankgen-t5-xl-pg19 +# model_name: kalpeshk2011/rankgen-t5-large-all tokenizer_name: google/t5-v1_1-base learning_rate: 6e-6 gradient_checkpointing: false