Apply quantization during DPO QLoRA (#115)

* Add QLoRA fix * Update script
2026-06-27 17:47:01 +08:00 · 2024-02-05 16:50:17 +01:00
parent d00e6f043e
commit 87cc800498
3 changed files with 11 additions and 13 deletions
@@ -1,12 +1,12 @@
 # Model arguments
 model_name_or_path: alignment-handbook/zephyr-7b-sft-qlora
-torch_dtype: float16
+torch_dtype: bfloat16

 # LoRA arguments
 use_peft: true
 load_in_4bit: true
-lora_r: 16
-lora_alpha: 16
+lora_r: 128
+lora_alpha: 128
 lora_dropout: 0.05
 lora_target_modules:
 - q_proj
@@ -32,7 +32,7 @@ beta: 0.01
 do_eval: true
 evaluation_strategy: steps
 eval_steps: 100
-gradient_accumulation_steps: 2
+gradient_accumulation_steps: 4
 gradient_checkpointing: true
 gradient_checkpointing_kwargs:
  use_reentrant: false