Make DPO work!

This commit is contained in:
Lewis Tunstall
2023-11-08 22:58:34 +00:00
parent e54e095978
commit ee10c4efd9
4 changed files with 60 additions and 63 deletions
+4 -4
View File
@@ -1,5 +1,5 @@
#!/bin/bash
#SBATCH --ntasks-per-node=1 # crucial - only 1 task per dist per node!
#SBATCH --ntasks-per-node=1
#SBATCH --exclusive
#SBATCH --gres=gpu:8
#SBATCH --partition=production-cluster
@@ -14,7 +14,7 @@ echo "START TIME: $(date)"
MODEL=$1
TASK=$2
VERSION=$3
PRECISION=$3
ACCELERATOR=$4
OPTIONAL_ARGS=$5
@@ -23,7 +23,7 @@ NUM_NODES=$SLURM_NNODES
GPUS_PER_NODE=8
WORLD_SIZE=$(($NUM_NODES*$GPUS_PER_NODE))
# Due to conflicts between Accelerate's DeepSpeed configs and Transformers' TrainingArguments, we need to parse the gradient accumulation steps from the config file to ensure they match
CONFIG_FILE=recipes/$MODEL/$TASK/config_$VERSION.yaml
CONFIG_FILE=recipes/$MODEL/$TASK/config_$PRECISION.yaml
GRAD_ACC_STEPS=$(yq -r .gradient_accumulation_steps $CONFIG_FILE)
# Split the string into individual arguments
@@ -69,7 +69,7 @@ export NCCL_ASYNC_ERROR_HANDLING=1
# export NCCL_NSOCKS_PERTHREAD=1
# export CUDA_LAUNCH_BLOCKING=1
# AWS specific
# Specific configuration for the Hugging Face Compute Cluster - be warned this may not work on other clusters!
export NCCL_PROTO=simple
export RDMAV_FORK_SAFE=1
export FI_EFA_FORK_SAFE=1
+37
View File
@@ -0,0 +1,37 @@
# Model arguments
model_name_or_path: lewtun/zephyr-7b-sft
# Data training arguments
# For definitions, see: src/h4/training/config.py
dataset_mixer:
HuggingFaceH4/ultrafeedback_binarized: 1.0
dataset_splits:
- train_prefs
- test_prefs
preprocessing_num_workers: 12
# DPOTrainer arguments
bf16: true
beta: 0.1
do_eval: true
evaluation_strategy: steps
eval_steps: 100
gradient_accumulation_steps: 1
gradient_checkpointing: true
hub_model_id: zephyr-7b-dpo
learning_rate: 5.0e-7
log_level: info
logging_steps: 10
lr_scheduler_type: linear
max_length: 1024
max_prompt_length: 512
num_train_epochs: 3
optim: rmsprop
output_dir: data/zephyr-7b-dpo
per_device_train_batch_size: 4
per_device_eval_batch_size: 4
push_to_hub: true
save_strategy: "no"
save_total_limit: null
seed: 42
warmup_ratio: 0.1
+1 -1
View File
@@ -17,6 +17,7 @@ bf16: true
evaluation_strategy: epoch
gradient_accumulation_steps: 2
gradient_checkpointing: true
hub_model_id: zephyr-7b-sft
hub_strategy: every_save
learning_rate: 2.0e-05
log_level: info
@@ -31,7 +32,6 @@ overwrite_output_dir: true
per_device_eval_batch_size: 16
per_device_train_batch_size: 32
push_to_hub: True
push_to_hub_model_id: zephyr-7b-sft
remove_unused_columns: true
report_to:
- tensorboard