mirror of
https://github.com/wassname/alignment-handbook.git
synced 2026-06-27 17:47:01 +08:00
Make DPO work!
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --ntasks-per-node=1 # crucial - only 1 task per dist per node!
|
||||
#SBATCH --ntasks-per-node=1
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --gres=gpu:8
|
||||
#SBATCH --partition=production-cluster
|
||||
@@ -14,7 +14,7 @@ echo "START TIME: $(date)"
|
||||
|
||||
MODEL=$1
|
||||
TASK=$2
|
||||
VERSION=$3
|
||||
PRECISION=$3
|
||||
ACCELERATOR=$4
|
||||
OPTIONAL_ARGS=$5
|
||||
|
||||
@@ -23,7 +23,7 @@ NUM_NODES=$SLURM_NNODES
|
||||
GPUS_PER_NODE=8
|
||||
WORLD_SIZE=$(($NUM_NODES*$GPUS_PER_NODE))
|
||||
# Due to conflicts between Accelerate's DeepSpeed configs and Transformers' TrainingArguments, we need to parse the gradient accumulation steps from the config file to ensure they match
|
||||
CONFIG_FILE=recipes/$MODEL/$TASK/config_$VERSION.yaml
|
||||
CONFIG_FILE=recipes/$MODEL/$TASK/config_$PRECISION.yaml
|
||||
GRAD_ACC_STEPS=$(yq -r .gradient_accumulation_steps $CONFIG_FILE)
|
||||
|
||||
# Split the string into individual arguments
|
||||
@@ -69,7 +69,7 @@ export NCCL_ASYNC_ERROR_HANDLING=1
|
||||
# export NCCL_NSOCKS_PERTHREAD=1
|
||||
# export CUDA_LAUNCH_BLOCKING=1
|
||||
|
||||
# AWS specific
|
||||
# Specific configuration for the Hugging Face Compute Cluster - be warned this may not work on other clusters!
|
||||
export NCCL_PROTO=simple
|
||||
export RDMAV_FORK_SAFE=1
|
||||
export FI_EFA_FORK_SAFE=1
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
# Model arguments
|
||||
model_name_or_path: lewtun/zephyr-7b-sft
|
||||
|
||||
# Data training arguments
|
||||
# For definitions, see: src/h4/training/config.py
|
||||
dataset_mixer:
|
||||
HuggingFaceH4/ultrafeedback_binarized: 1.0
|
||||
dataset_splits:
|
||||
- train_prefs
|
||||
- test_prefs
|
||||
preprocessing_num_workers: 12
|
||||
|
||||
# DPOTrainer arguments
|
||||
bf16: true
|
||||
beta: 0.1
|
||||
do_eval: true
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 100
|
||||
gradient_accumulation_steps: 1
|
||||
gradient_checkpointing: true
|
||||
hub_model_id: zephyr-7b-dpo
|
||||
learning_rate: 5.0e-7
|
||||
log_level: info
|
||||
logging_steps: 10
|
||||
lr_scheduler_type: linear
|
||||
max_length: 1024
|
||||
max_prompt_length: 512
|
||||
num_train_epochs: 3
|
||||
optim: rmsprop
|
||||
output_dir: data/zephyr-7b-dpo
|
||||
per_device_train_batch_size: 4
|
||||
per_device_eval_batch_size: 4
|
||||
push_to_hub: true
|
||||
save_strategy: "no"
|
||||
save_total_limit: null
|
||||
seed: 42
|
||||
warmup_ratio: 0.1
|
||||
@@ -17,6 +17,7 @@ bf16: true
|
||||
evaluation_strategy: epoch
|
||||
gradient_accumulation_steps: 2
|
||||
gradient_checkpointing: true
|
||||
hub_model_id: zephyr-7b-sft
|
||||
hub_strategy: every_save
|
||||
learning_rate: 2.0e-05
|
||||
log_level: info
|
||||
@@ -31,7 +32,6 @@ overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 16
|
||||
per_device_train_batch_size: 32
|
||||
push_to_hub: True
|
||||
push_to_hub_model_id: zephyr-7b-sft
|
||||
remove_unused_columns: true
|
||||
report_to:
|
||||
- tensorboard
|
||||
|
||||
Reference in New Issue
Block a user