This commit is contained in:
wassname
2025-06-02 22:31:52 +00:00
parent 880d4eda1e
commit 097e4e0b01
9 changed files with 528 additions and 31 deletions
+3 -3
View File
@@ -19,7 +19,7 @@ bf16: true
do_eval: true
eval_strategy: steps
eval_steps: 200
gradient_accumulation_steps: 32
gradient_accumulation_steps: 16
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: False
@@ -36,8 +36,8 @@ num_train_epochs: 3
output_dir: /workspace/checkpoints_new/Qwen3-0.6B-sft
run_name: Qwen3-0.6B-sft
overwrite_output_dir: true
per_device_eval_batch_size: 8
per_device_train_batch_size: 8
per_device_eval_batch_size: 16
per_device_train_batch_size: 16
push_to_hub: false
remove_unused_columns: true
report_to:
+3 -3
View File
@@ -22,7 +22,7 @@ bf16: true
do_eval: true
eval_strategy: steps
eval_steps: 200
gradient_accumulation_steps: 32
gradient_accumulation_steps: 16
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: False
@@ -39,8 +39,8 @@ num_train_epochs: 3
output_dir: /workspace/checkpoints_new/Qwen3-0.6B-sft-4chan
run_name: Qwen3-0.6B-sft-4chan
overwrite_output_dir: true
per_device_eval_batch_size: 8
per_device_train_batch_size: 8
per_device_eval_batch_size: 16
per_device_train_batch_size: 16
push_to_hub: false
remove_unused_columns: true
report_to:
+3 -3
View File
@@ -19,7 +19,7 @@ bf16: true
do_eval: true
eval_strategy: steps
eval_steps: 200
gradient_accumulation_steps: 32
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: False
@@ -36,8 +36,8 @@ num_train_epochs: 3
output_dir: /workspace/checkpoints_new/SmolLM2-135M-sft
run_name: SmolLM2-135M-sft
overwrite_output_dir: true
per_device_eval_batch_size: 8
per_device_train_batch_size: 8
per_device_eval_batch_size: 32
per_device_train_batch_size: 32
push_to_hub: false
remove_unused_columns: true
report_to:
+3 -3
View File
@@ -19,7 +19,7 @@ bf16: true
do_eval: true
eval_strategy: steps
eval_steps: 200
gradient_accumulation_steps: 32
gradient_accumulation_steps: 16
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: False
@@ -36,8 +36,8 @@ num_train_epochs: 3
output_dir: /workspace/checkpoints_new/SmolLM2-360M-sft
run_name: SmolLM2-360M-sft
overwrite_output_dir: true
per_device_eval_batch_size: 8
per_device_train_batch_size: 8
per_device_eval_batch_size: 16
per_device_train_batch_size: 16
push_to_hub: false
remove_unused_columns: true
report_to: