From 9ef5dcbd6971def306e8c21cedd9757ccb8fe219 Mon Sep 17 00:00:00 2001
From: Yu Meng <yumeng5@illinois.edu>
Date: Tue, 9 Jul 2024 14:52:58 -0400
Subject: [PATCH] add v0.2 training script

---
 .../llama-3-8b-instruct-simpo-v2.yaml         | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 training_configs/llama-3-8b-instruct-simpo-v2.yaml

diff --git a/training_configs/llama-3-8b-instruct-simpo-v2.yaml b/training_configs/llama-3-8b-instruct-simpo-v2.yaml
new file mode 100644
index 0000000..3d58fa7
--- /dev/null
+++ b/training_configs/llama-3-8b-instruct-simpo-v2.yaml
@@ -0,0 +1,45 @@
+# Model arguments
+model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
+torch_dtype: null
+use_flash_attention_2: true
+
+# Data training arguments
+dataset_mixer:
+  princeton-nlp/llama3-ultrafeedback-armorm: 1.0
+dataset_splits:
+- train
+- test
+preprocessing_num_workers: 12
+
+# SimPOTrainer arguments
+bf16: true
+beta: 10
+gamma_beta_ratio: 0.3
+do_eval: true
+evaluation_strategy: steps
+eval_steps: 400
+gradient_accumulation_steps: 16 
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: False
+hub_model_id: simpo-exps
+learning_rate: 1.0e-6
+log_level: info
+logging_steps: 5
+lr_scheduler_type: cosine
+max_length: 2048
+max_prompt_length: 1800
+num_train_epochs: 1
+optim: adamw_torch
+output_dir: outputs/llama-3-8b-instruct-simpo
+run_name: llama-3-8b-instruct-simpo
+per_device_train_batch_size: 2
+per_device_eval_batch_size: 4
+push_to_hub: false
+save_strategy: "steps"
+save_steps: 1000000
+report_to:
+- wandb
+save_total_limit: 20
+seed: 42
+warmup_ratio: 0.1