Make DPO work!

2026-06-27 17:47:01 +08:00 · 2023-11-08 22:58:34 +00:00
parent e54e095978
commit ee10c4efd9
4 changed files with 60 additions and 63 deletions
@@ -1,5 +1,5 @@
 #!/bin/bash
-#SBATCH --ntasks-per-node=1 # crucial - only 1 task per dist per node!
+#SBATCH --ntasks-per-node=1
 #SBATCH --exclusive
 #SBATCH --gres=gpu:8
 #SBATCH --partition=production-cluster
@@ -14,7 +14,7 @@ echo "START TIME: $(date)"

 MODEL=$1
 TASK=$2
-VERSION=$3
+PRECISION=$3
 ACCELERATOR=$4
 OPTIONAL_ARGS=$5

@@ -23,7 +23,7 @@ NUM_NODES=$SLURM_NNODES
 GPUS_PER_NODE=8
 WORLD_SIZE=$(($NUM_NODES*$GPUS_PER_NODE))
 # Due to conflicts between Accelerate's DeepSpeed configs and Transformers' TrainingArguments, we need to parse the gradient accumulation steps from the config file to ensure they match
-CONFIG_FILE=recipes/$MODEL/$TASK/config_$VERSION.yaml
+CONFIG_FILE=recipes/$MODEL/$TASK/config_$PRECISION.yaml
 GRAD_ACC_STEPS=$(yq -r .gradient_accumulation_steps $CONFIG_FILE)

 # Split the string into individual arguments
@@ -69,7 +69,7 @@ export NCCL_ASYNC_ERROR_HANDLING=1
 # export NCCL_NSOCKS_PERTHREAD=1
 # export CUDA_LAUNCH_BLOCKING=1

-# AWS specific
+# Specific configuration for the Hugging Face Compute Cluster - be warned this may not work on other clusters!
 export NCCL_PROTO=simple
 export RDMAV_FORK_SAFE=1
 export FI_EFA_FORK_SAFE=1
@@ -0,0 +1,37 @@
+# Model arguments
+model_name_or_path: lewtun/zephyr-7b-sft
+
+# Data training arguments
+# For definitions, see: src/h4/training/config.py
+dataset_mixer:
+  HuggingFaceH4/ultrafeedback_binarized: 1.0
+dataset_splits:
+- train_prefs
+- test_prefs
+preprocessing_num_workers: 12
+
+# DPOTrainer arguments
+bf16: true
+beta: 0.1
+do_eval: true
+evaluation_strategy: steps
+eval_steps: 100
+gradient_accumulation_steps: 1
+gradient_checkpointing: true
+hub_model_id: zephyr-7b-dpo
+learning_rate: 5.0e-7
+log_level: info
+logging_steps: 10
+lr_scheduler_type: linear
+max_length: 1024
+max_prompt_length: 512
+num_train_epochs: 3
+optim: rmsprop
+output_dir: data/zephyr-7b-dpo
+per_device_train_batch_size: 4
+per_device_eval_batch_size: 4
+push_to_hub: true
+save_strategy: "no"
+save_total_limit: null
+seed: 42
+warmup_ratio: 0.1
@@ -17,6 +17,7 @@ bf16: true
 evaluation_strategy: epoch
 gradient_accumulation_steps: 2
 gradient_checkpointing: true
+hub_model_id: zephyr-7b-sft
 hub_strategy: every_save
 learning_rate: 2.0e-05
 log_level: info
@@ -31,7 +32,6 @@ overwrite_output_dir: true
 per_device_eval_batch_size: 16
 per_device_train_batch_size: 32
 push_to_hub: True
-push_to_hub_model_id: zephyr-7b-sft
 remove_unused_columns: true
 report_to:
 - tensorboard