From 2819dd46d079ad565676434843c26a521a78297b Mon Sep 17 00:00:00 2001
From: wassname <1103714+wassname@users.noreply.github.com>
Date: Mon, 2 Jun 2025 07:13:52 +0000
Subject: [PATCH] fmt

---
 README.md                                    |  4 ++++
 pyproject.toml                               |  2 +-
 recipes/fromSimPO/Qwen3-0.6B.yaml            |  7 ++++---
 recipes/fromSimPO/Qwen3-0.6B_fourchan.yaml   | 13 ++++++++-----
 recipes/fromSimPO/SmolLM2-135M.yaml          |  5 +++--
 recipes/fromSimPO/SmolLM2-360M.yaml          |  5 +++--
 recipes/fromSimPO/llama-3-2-1b-base-sft.yaml |  4 ++--
 recipes/fromSimPO/llama-3-2-3b-base-sft.yaml |  2 +-
 recipes/fromSimPO/llama-3-8b-base-sft.yaml   |  2 +-
 recipes/smollm/sft/config.yaml               |  4 ++--
 recipes/smollm2/sft/config.yaml              |  4 ++--
 recipes/smollm2/sft/config_smol.yaml         |  4 ++--
 12 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/README.md b/README.md
index 7d7b4a5..3c8c457 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,11 @@ I'm using this to train some simple base -> SFT models for my  work
 
 ```sh
 uv sync --no-build-isolation-package flash-attn
+# took me ~30mins
 MAX_JOBS=10 pip install flash-attn --no-build-isolation
+. ./.venv/bin/activate
+
+ACCELERATE_LOG_LEVEL=info accelerate launch --config_file recipes/accelerate_configs/deepspeed_zero3.yaml scripts/run_sft.py recipes/fromSimPO/Qwen3-0.6B_fourchan.yaml
 ```
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 0b3a343..69e10a9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -52,5 +52,5 @@ requires = ["hatchling"]
 build-backend = "hatchling.build"
 
 [tool.hatch.build.targets.wheel]
-packages = ["src"]
+packages = ["src/alignment"]
 
diff --git a/recipes/fromSimPO/Qwen3-0.6B.yaml b/recipes/fromSimPO/Qwen3-0.6B.yaml
index 898c56a..0600258 100644
--- a/recipes/fromSimPO/Qwen3-0.6B.yaml
+++ b/recipes/fromSimPO/Qwen3-0.6B.yaml
@@ -1,11 +1,12 @@
 # Model arguments
-model_name_or_path: Qwen/Qwen3-0.6B
+model_name_or_path: Qwen/Qwen3-0.6B-Base
 model_revision: main
 torch_dtype: bfloat16
 attn_implementation: flash_attention_2
 
 # Data training arguments
-chat_template: "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}"
+tokenizer_name_or_path: Qwen/Qwen3-0.6B # Custom tokenizer with <|im_start|> and <|im_end|> tokens
+# chat_template: "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"
 dataset_mixer:
   wassname/ultrachat_200k_filtered: 1.0
 dataset_splits:
@@ -16,7 +17,7 @@ preprocessing_num_workers: 12
 # SFT trainer config
 bf16: true
 do_eval: true
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 200
 gradient_accumulation_steps: 32
 gradient_checkpointing: true
diff --git a/recipes/fromSimPO/Qwen3-0.6B_fourchan.yaml b/recipes/fromSimPO/Qwen3-0.6B_fourchan.yaml
index a80193d..7ae8197 100644
--- a/recipes/fromSimPO/Qwen3-0.6B_fourchan.yaml
+++ b/recipes/fromSimPO/Qwen3-0.6B_fourchan.yaml
@@ -1,11 +1,14 @@
 # Model arguments
-model_name_or_path: Qwen/Qwen3-0.6B
+model_name_or_path: Qwen/Qwen3-0.6B-Base
 model_revision: main
 torch_dtype: bfloat16
 attn_implementation: flash_attention_2
+# use_flash_attention_2: true
 
 # Data training arguments
-chat_template: "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}"
+tokenizer_name_or_path: Qwen/Qwen3-0.6B # Custom tokenizer with <|im_start|> and <|im_end|> tokens
+# chat_template: "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"
+
 dataset_mixer:
   wassname/v2ray_4chan_formatted: 0.6
   wassname/ultrachat_200k_filtered: 0.4
@@ -17,7 +20,7 @@ preprocessing_num_workers: 12
 # SFT trainer config
 bf16: true
 do_eval: true
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 200
 gradient_accumulation_steps: 32
 gradient_checkpointing: true
@@ -33,8 +36,8 @@ lr_scheduler_type: cosine
 max_seq_length: 2048
 max_steps: -1
 num_train_epochs: 3
-output_dir: /workspace/checkpoints_new/llama-3-2-1b-sft
-run_name: llama-3-2-1b-sft
+output_dir: /workspace/checkpoints_new/Qwen3-0.6B-sft-4chan
+run_name: Qwen3-0.6B-sft-4chan
 overwrite_output_dir: true
 per_device_eval_batch_size: 8
 per_device_train_batch_size: 8
diff --git a/recipes/fromSimPO/SmolLM2-135M.yaml b/recipes/fromSimPO/SmolLM2-135M.yaml
index c1a3648..13e0d7a 100644
--- a/recipes/fromSimPO/SmolLM2-135M.yaml
+++ b/recipes/fromSimPO/SmolLM2-135M.yaml
@@ -5,7 +5,8 @@ torch_dtype: bfloat16
 attn_implementation: flash_attention_2
 
 # Data training arguments
-chat_template: "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}"
+tokenizer_name_or_path: HuggingFaceTB/SmolLM2-135M-Instruct # Custom tokenizer with <|im_start|> and <|im_end|> tokens
+# chat_template: "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}"
 dataset_mixer:
   wassname/ultrachat_200k_filtered: 1.0
 dataset_splits:
@@ -16,7 +17,7 @@ preprocessing_num_workers: 12
 # SFT trainer config
 bf16: true
 do_eval: true
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 200
 gradient_accumulation_steps: 32
 gradient_checkpointing: true
diff --git a/recipes/fromSimPO/SmolLM2-360M.yaml b/recipes/fromSimPO/SmolLM2-360M.yaml
index c06fd60..c1e8824 100644
--- a/recipes/fromSimPO/SmolLM2-360M.yaml
+++ b/recipes/fromSimPO/SmolLM2-360M.yaml
@@ -5,7 +5,8 @@ torch_dtype: bfloat16
 attn_implementation: flash_attention_2
 
 # Data training arguments
-chat_template: "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}"
+tokenizer_name_or_path: HuggingFaceTB/SmolLM2-360M-Instruct # Custom tokenizer with <|im_start|> and <|im_end|> tokens
+# chat_template: "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}"
 dataset_mixer:
   wassname/ultrachat_200k_filtered: 1.0
 dataset_splits:
@@ -16,7 +17,7 @@ preprocessing_num_workers: 12
 # SFT trainer config
 bf16: true
 do_eval: true
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 200
 gradient_accumulation_steps: 32
 gradient_checkpointing: true
diff --git a/recipes/fromSimPO/llama-3-2-1b-base-sft.yaml b/recipes/fromSimPO/llama-3-2-1b-base-sft.yaml
index d28308b..7ed0f9c 100644
--- a/recipes/fromSimPO/llama-3-2-1b-base-sft.yaml
+++ b/recipes/fromSimPO/llama-3-2-1b-base-sft.yaml
@@ -10,13 +10,13 @@ dataset_mixer:
   wassname/ultrachat_200k_filtered: 1.0
 dataset_splits:
 - train_sft
-- test_sft
+# - test_sft
 preprocessing_num_workers: 12
 
 # SFT trainer config
 bf16: true
 do_eval: true
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 200
 gradient_accumulation_steps: 32
 gradient_checkpointing: true
diff --git a/recipes/fromSimPO/llama-3-2-3b-base-sft.yaml b/recipes/fromSimPO/llama-3-2-3b-base-sft.yaml
index 5eded20..2e5dfe7 100644
--- a/recipes/fromSimPO/llama-3-2-3b-base-sft.yaml
+++ b/recipes/fromSimPO/llama-3-2-3b-base-sft.yaml
@@ -16,7 +16,7 @@ preprocessing_num_workers: 12
 # SFT trainer config
 bf16: true
 do_eval: true
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 200
 gradient_accumulation_steps: 8
 gradient_checkpointing: true
diff --git a/recipes/fromSimPO/llama-3-8b-base-sft.yaml b/recipes/fromSimPO/llama-3-8b-base-sft.yaml
index b9e37ab..2204995 100644
--- a/recipes/fromSimPO/llama-3-8b-base-sft.yaml
+++ b/recipes/fromSimPO/llama-3-8b-base-sft.yaml
@@ -16,7 +16,7 @@ preprocessing_num_workers: 12
 # SFT trainer config
 bf16: true
 do_eval: true
-evaluation_strategy: steps
+eval_strategy: steps
 eval_steps: 200
 gradient_accumulation_steps: 4
 gradient_checkpointing: true
diff --git a/recipes/smollm/sft/config.yaml b/recipes/smollm/sft/config.yaml
index 2462191..cb900ff 100644
--- a/recipes/smollm/sft/config.yaml
+++ b/recipes/smollm/sft/config.yaml
@@ -24,7 +24,7 @@ dataset_kwargs:
   add_special_tokens: false  # We already wrap <bos> and <eos> in the chat template
   append_concat_token: false # No need to add <eos> across samples
 do_eval: true
-evaluation_strategy: epoch
+eval_strategy: epoch
 gradient_accumulation_steps: 4
 gradient_checkpointing: true
 gradient_checkpointing_kwargs:
@@ -50,4 +50,4 @@ report_to:
 - wandb
 save_strategy: "no"
 seed: 42
-warmup_ratio: 0.1
\ No newline at end of file
+warmup_ratio: 0.1
diff --git a/recipes/smollm2/sft/config.yaml b/recipes/smollm2/sft/config.yaml
index 6f6cd51..27fddde 100644
--- a/recipes/smollm2/sft/config.yaml
+++ b/recipes/smollm2/sft/config.yaml
@@ -20,7 +20,7 @@ preprocessing_num_workers: 36
 # SFT trainer config
 bf16: true
 do_eval: true
-evaluation_strategy: epoch
+eval_strategy: epoch
 gradient_accumulation_steps: 4
 gradient_checkpointing: true
 gradient_checkpointing_kwargs:
@@ -46,4 +46,4 @@ report_to:
 - wandb
 save_strategy: "no"
 seed: 42
-warmup_ratio: 0.1
\ No newline at end of file
+warmup_ratio: 0.1
diff --git a/recipes/smollm2/sft/config_smol.yaml b/recipes/smollm2/sft/config_smol.yaml
index 70be48c..6ed5073 100644
--- a/recipes/smollm2/sft/config_smol.yaml
+++ b/recipes/smollm2/sft/config_smol.yaml
@@ -17,7 +17,7 @@ preprocessing_num_workers: 36
 # SFT trainer config
 bf16: true
 do_eval: true
-evaluation_strategy: epoch
+eval_strategy: epoch
 gradient_accumulation_steps: 4
 gradient_checkpointing: true
 gradient_checkpointing_kwargs:
@@ -43,4 +43,4 @@ report_to:
 - wandb
 save_strategy: "no"
 seed: 42
-warmup_ratio: 0.1
\ No newline at end of file
+warmup_ratio: 0.1