From 2de17f5ba1a6f77975b6c90caee6df812c424fb7 Mon Sep 17 00:00:00 2001 From: Lewis Tunstall Date: Thu, 9 Nov 2023 07:32:24 +0000 Subject: [PATCH] Add doc --- .../accelerate_configs/deepspeed_zero1.yaml | 19 ----------- .../accelerate_configs/deepspeed_zero2.yaml | 21 ------------ scripts/README.md | 32 +++++++++++++++++++ 3 files changed, 32 insertions(+), 40 deletions(-) delete mode 100644 recipes/accelerate_configs/deepspeed_zero1.yaml delete mode 100644 recipes/accelerate_configs/deepspeed_zero2.yaml diff --git a/recipes/accelerate_configs/deepspeed_zero1.yaml b/recipes/accelerate_configs/deepspeed_zero1.yaml deleted file mode 100644 index 1dfeda0..0000000 --- a/recipes/accelerate_configs/deepspeed_zero1.yaml +++ /dev/null @@ -1,19 +0,0 @@ -compute_environment: LOCAL_MACHINE -debug: false -deepspeed_config: - deepspeed_multinode_launcher: standard - zero3_init_flag: false - zero_stage: 1 -distributed_type: DEEPSPEED -downcast_bf16: 'no' -machine_rank: 0 -main_training_function: main -mixed_precision: bf16 -num_machines: 1 -num_processes: 8 -rdzv_backend: static -same_network: true -tpu_env: [] -tpu_use_cluster: false -tpu_use_sudo: false -use_cpu: false diff --git a/recipes/accelerate_configs/deepspeed_zero2.yaml b/recipes/accelerate_configs/deepspeed_zero2.yaml deleted file mode 100644 index 0777900..0000000 --- a/recipes/accelerate_configs/deepspeed_zero2.yaml +++ /dev/null @@ -1,21 +0,0 @@ -compute_environment: LOCAL_MACHINE -debug: false -deepspeed_config: - deepspeed_multinode_launcher: standard - offload_optimizer_device: none - offload_param_device: none - zero3_init_flag: false - zero_stage: 2 -distributed_type: DEEPSPEED -downcast_bf16: 'no' -machine_rank: 0 -main_training_function: main -mixed_precision: bf16 -num_machines: 1 -num_processes: 8 -rdzv_backend: static -same_network: true -tpu_env: [] -tpu_use_cluster: false -tpu_use_sudo: false -use_cpu: false diff --git a/scripts/README.md b/scripts/README.md index de1c109..502f566 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,6 +1,38 @@ ## Supervised Fine-Tuning (SFT) +We provide 3 main ways to train SFT models: + +* Distributed fine-tuning of all model weights with ZeRO-3 +* Fine-tuning with LoRA adapters and ZeRO-3 +* Fine-tuning with QLoRA adapters and DDP + +```shell +# Full training with ZeRO-3 +ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml scripts/run_sft.py recipes/{model_name}/sft/config_full.yaml + +# LoRA training with ZeRO-3 +ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml scripts/run_sft.py recipes/{model_name}/sft/config_16bit.yaml + +# QLoRA training with DDP +ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/multi_gpu.yaml scripts/run_sft.py recipes/{model_name}/sft/config_8bit.yaml ``` +You can override the parameters in each YAML config by appending them to the command as follows: + +```shell +ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml scripts/run_sft.py recipes/{model_name}/sft/config_full.yaml --per_device_train_batch_size=2 --num_train_epochs=3 +``` + +## Direct Preference Optimisation (DPO) + +```shell +# Full training with ZeRO-3 +ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml scripts/run_dpo.py recipes/{model_name}/dpo/config_full.yaml + +# LoRA training with ZeRO-3 +ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml scripts/run_dpo.py recipes/{model_name}/dpo/config_16bit.yaml + +# QLoRA training with DDP +ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/multi_gpu.yaml scripts/run_dpo.py recipes/{model_name}/dpo/config_8bit.yaml ``` \ No newline at end of file