mirror of
https://github.com/wassname/alignment-handbook.git
synced 2026-06-27 17:47:01 +08:00
fix: Zephyr LoRA fine-tuning fixed (#139)
Co-authored-by: svbogdanov <sergei@numind.ai>
This commit is contained in:
@@ -23,10 +23,22 @@ ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_con
|
||||
|
||||
## QLoRA training examples
|
||||
|
||||
```shell
|
||||
Train faster with flash-attention 2 (GPU supporting FA2: A100, H100, etc)
|
||||
```````shell
|
||||
# Step 1 - SFT
|
||||
ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/multi_gpu.yaml --num_processes=1 scripts/run_sft.py recipes/zephyr-7b-beta/sft/config_qlora.yaml --load_in_4bit=true
|
||||
|
||||
# Step 2 - DPO
|
||||
ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/multi_gpu.yaml --num_processes=1 scripts/run_dpo.py recipes/zephyr-7b-beta/dpo/config_qlora.yaml
|
||||
```
|
||||
```````
|
||||
|
||||
P.S. Using Flash Attention also allows you to drastically increase the batch size (x2 in my case)
|
||||
|
||||
Train without flash-attention:
|
||||
```````shell
|
||||
# Step 1 - SFT
|
||||
ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/multi_gpu.yaml --num_processes=1 scripts/run_sft.py recipes/zephyr-7b-beta/sft/config_qlora.yaml --load_in_4bit=true --use_flash_attention_2=false
|
||||
|
||||
# Step 2 - DPO
|
||||
ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/multi_gpu.yaml --num_processes=1 scripts/run_dpo.py recipes/zephyr-7b-beta/dpo/config_qlora.yaml --use_flash_attention_2=false
|
||||
```````
|
||||
@@ -1,6 +1,7 @@
|
||||
# Model arguments
|
||||
model_name_or_path: alignment-handbook/zephyr-7b-sft-qlora
|
||||
torch_dtype: bfloat16
|
||||
use_flash_attention_2: true
|
||||
|
||||
# LoRA arguments
|
||||
use_peft: true
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
# Model arguments
|
||||
model_name_or_path: mistralai/Mistral-7B-v0.1
|
||||
model_revision: main
|
||||
torch_dtype: float16
|
||||
torch_dtype: bfloat16
|
||||
use_flash_attention_2: true
|
||||
|
||||
# LoRA arguments
|
||||
load_in_4bit: true
|
||||
|
||||
Reference in New Issue
Block a user