mirror of
https://github.com/wassname/SimPO.git
synced 2026-06-27 18:03:02 +08:00
wip
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
|
||||
|
||||
sft:
|
||||
. ./.venv/bin/activate
|
||||
ACCELERATE_LOG_LEVEL=info accelerate launch --config_file accelerate_configs/deepspeed_zero3.yaml scripts/run_simpo.py training_configs/llama-3-2-1b-base-sft.yaml
|
||||
|
||||
|
||||
|
||||
Generated
+3578
File diff suppressed because it is too large
Load Diff
@@ -18,11 +18,11 @@ bf16: true
|
||||
do_eval: true
|
||||
evaluation_strategy: steps
|
||||
eval_steps: 200
|
||||
gradient_accumulation_steps: 4
|
||||
gradient_accumulation_steps: 1
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: False
|
||||
hub_model_id: zephyr-7b-sft-full
|
||||
hub_model_id: llama-3-2-1b-sft
|
||||
hub_strategy: every_save
|
||||
learning_rate: 2.0e-05
|
||||
log_level: info
|
||||
@@ -32,11 +32,11 @@ lr_scheduler_type: cosine
|
||||
max_seq_length: 2048
|
||||
max_steps: -1
|
||||
num_train_epochs: 1
|
||||
output_dir: /scratch/gpfs/DANQIC/ym0081/checkpoints_new/llama-3-8b-sft
|
||||
run_name: llama-3-8b-sft
|
||||
output_dir: /workspace/checkpoints_new/llama-3-2-1b-sft
|
||||
run_name: llama-3-2-1b-sft
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 8
|
||||
per_device_train_batch_size: 8
|
||||
per_device_eval_batch_size: 32
|
||||
per_device_train_batch_size: 32
|
||||
push_to_hub: false
|
||||
remove_unused_columns: true
|
||||
report_to:
|
||||
|
||||
Reference in New Issue
Block a user