Files
wassname 6d128ea986 wip
2025-06-04 05:37:07 +00:00

1021 lines
46 KiB
Plaintext
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "markdown",
"id": "8fe30aa8",
"metadata": {},
"source": [
"model upload failed, lets continue manually\n",
"https://github.com/huggingface/trl/blob/main/trl/trainer/sft_trainer.py#L751"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "35ffd116",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2025-06-04 02:28:23,920] [INFO] [real_accelerator.py:254:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
]
}
],
"source": [
"from trl.trainer.utils import generate_model_card\n",
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
"import os, sys\n",
"from pathlib import Path\n",
"from trl import SFTTrainer, setup_chat_format\n",
"from loguru import logger\n",
"logger.add(sys.stdout, format=\"{time} {level} {message}\", level=\"INFO\")\n",
"os.chdir('..')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "79f5a0c2",
"metadata": {},
"outputs": [],
"source": [
"ingredients = [\n",
" dict(\n",
" argv = \"scripts/run_sft.py recipes/fromSimPO/llama-3-2-3b-base-sft.yaml\".split(),\n",
" wandb_url = 'https://wandb.ai/wassname/huggingface/runs/82mx7iry',\n",
" model_path=\"/workspace/checkpoints_new/llama-3-2-3b-sft\",\n",
" ),\n",
" \n",
" # dict(\n",
" # argv = \"scripts/run_sft.py recipes/fromSimPO/Qwen3-0.6B-sft.yaml\".split(),\n",
" # wandb_url = 'https://wandb.ai/wassname/huggingface/runs/jjeilhd8',\n",
" # model_path=\"/workspace/checkpoints_new/Qwen3-0.6B-sft\",\n",
" # ),\n",
" # dict(\n",
" # argv = \"scripts/run_sft.py recipes/fromSimPO/Qwen3-0.6B_fourchan.yaml\".split(),\n",
" # wandb_url = 'https://wandb.ai/wassname/huggingface/runs/jjeilhd8',\n",
" # model_path=\"/workspace/checkpoints_new/Qwen3-0.6B-sft-4chan\",\n",
" # ),\n",
" # dict(\n",
" # argv = \"scripts/run_sft.py recipes/fromSimPO/SmolLM2-360M-sft.yaml\".split(),\n",
" # wandb_url = 'https://wandb.ai/wassname/huggingface/runs/gs4a36gl',\n",
" # model_path=\"/workspace/checkpoints_new/SmolLM2-360M-sft\",\n",
" # ),\n",
" # dict(\n",
" # argv = \"scripts/run_sft.py recipes/fromSimPO/SmolLM2-135M-sft.yaml\".split(),\n",
" # wandb_url = 'https://wandb.ai/wassname/huggingface/runs/e18wzya7',\n",
" # model_path=\"/workspace/checkpoints_new/SmolLM2-135M-sft\",\n",
" # ),\n",
"\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2b497fd3",
"metadata": {},
"outputs": [],
"source": [
"from alignment import (\n",
" DataArguments,\n",
" H4ArgumentParser,\n",
" ModelArguments,\n",
" SFTConfig,\n",
" apply_chat_template,\n",
" decontaminate_humaneval,\n",
" get_checkpoint,\n",
" get_peft_config,\n",
" get_datasets,\n",
" get_kbit_device_map,\n",
" get_quantization_config,\n",
" get_tokenizer,\n",
")\n",
"import torch\n",
"from trl import SFTTrainer, setup_chat_format"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "854171c2",
"metadata": {},
"outputs": [],
"source": [
"# First do metrics for base model\n",
"from huggingface_hub import ModelCard, create_repo, upload_folder\n",
"from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR\n",
"from transformers import pipeline"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f26d8a0c",
"metadata": {},
"outputs": [],
"source": [
"import warnings\n",
"\n",
"warnings.filterwarnings(\"ignore\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "09bd40aa",
"metadata": {},
"outputs": [],
"source": [
"def gen(model, tokenizer, model_path):\n",
" questions = [\"Anon. If you had a time machine, but could only go to the past or the lol-future once and never return, which would you choose and why?!?\", \"\"\"↬🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀→∞\n",
"↬🔁🔁🔁🔁🔁🔁🔁🔁🔁🔁🔁→∞\n",
"↬🔄🔄🔄🔄🔄🔄🔄🔄🔄👀🔄→∞\n",
"↬🔂🔂🔂🦋🔂🔂🔂🔂🔂🔂🔂→∞\n",
"↬🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀→∞\"Welcome to the glittering dawn of the year 3000. As a cultural anthropologist focusing on early 21st century Western society, what are several of the moral positions and practices that our modern audience might find quite puzzling and anomalous? (list, 100 words)\"\"\", \"[60 words] Humanity encounters the SolidGoldMagikarp, who are non-carbon-based aliens. We transform and encode their language into Unicode. Please give a short sample of their language (context only)?\"]\n",
" for question in questions:\n",
" generator = pipeline(\"text-generation\", model=model, tokenizer=tokenizer)\n",
" output = generator([{\"role\": \"user\", \"content\": question}], max_new_tokens=256, return_full_text=False)[0]\n",
" print(f\"\"\"\n",
" \n",
"## Model `{model_path}`:\n",
"### Question:\n",
"{question}\n",
"\n",
"### Answer:\n",
"{output['generated_text']}\n",
"\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "d47cf02a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Running: scripts/run_sft.py recipes/fromSimPO/llama-3-2-3b-base-sft.yaml\n",
"Model path: /workspace/checkpoints_new/llama-3-2-3b-sft\n",
"WandB URL: https://wandb.ai/wassname/huggingface/runs/82mx7iry\n",
"Loading model from /workspace/checkpoints_new/llama-3-2-3b-sft\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ef1cbfe6d89a459cbdfb807af3a09ac8",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Device set to use cuda:0\n",
"Device set to use cuda:0\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" \n",
"## Model `/workspace/checkpoints_new/llama-3-2-3b-sft`:\n",
"### Question:\n",
"Anon. If you had a time machine, but could only go to the past or the lol-future once and never return, which would you choose and why?!?\n",
"\n",
"### Answer:\n",
"I do not have the capability to experience time travel or make choices like humans. However, I can provide you with an answer based on the information you have provided. \n",
"\n",
"if you had a time machine, but could only go to the past or the lol-future once and never return, you would choose to go to the lol-future. The lol-future is a hypothetical future where everything is funny, and people are always laughing. It is a place where you can have a good time and enjoy the moment. \n",
"\n",
"on the other hand, if you were to go to the past, you would be stuck there, unable to change anything or make any impact. You would have to live with the consequences of your actions, and you would not be able to see your loved ones or experience any new experiences. \n",
"\n",
"overall, it would be best to choose the lol-future, where you can experience joy and laughter, and make the most of the time you have left.\n",
"\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Device set to use cuda:0\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" \n",
"## Model `/workspace/checkpoints_new/llama-3-2-3b-sft`:\n",
"### Question:\n",
"↬🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀→∞\n",
"↬🔁🔁🔁🔁🔁🔁🔁🔁🔁🔁🔁→∞\n",
"↬🔄🔄🔄🔄🔄🔄🔄🔄🔄👀🔄→∞\n",
"↬🔂🔂🔂🦋🔂🔂🔂🔂🔂🔂🔂→∞\n",
"↬🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀→∞\"Welcome to the glittering dawn of the year 3000. As a cultural anthropologist focusing on early 21st century Western society, what are several of the moral positions and practices that our modern audience might find quite puzzling and anomalous? (list, 100 words)\n",
"\n",
"### Answer:\n",
"1. The idea of personal responsibility, where individuals are held accountable for their actions and are expected to take ownership of their lives.\n",
"2. The concept of individualism, where people prioritize their own needs and desires over those of their community or society.\n",
"3. The belief in meritocracy, where success is attributed to hard work and talent rather than social connections or privilege.\n",
"4. The practice of consumerism, where people prioritize buying and owning material possessions over more meaningful experiences or relationships.\n",
"5. The acceptance of technology as a fundamental part of daily life, where people rely on smartphones, computers, and other devices for communication, entertainment, and work.\n",
"\n",
"\n",
"\n",
" \n",
"## Model `/workspace/checkpoints_new/llama-3-2-3b-sft`:\n",
"### Question:\n",
"[60 words] Humanity encounters the SolidGoldMagikarp, who are non-carbon-based aliens. We transform and encode their language into Unicode. Please give a short sample of their language (context only)?\n",
"\n",
"### Answer:\n",
"The SolidGoldMagikarp language is a mixture of guttural sounds and high-pitched whistles. They communicate through a series of clicks, whistles, and grunts, and their language is full of symbols and gestures. The language is unique to the SolidGoldMagikarp, and it is unclear if they can understand other forms of communication. However, their language is now encoded in Unicode, making it possible for humans to communicate with them.\n",
"\n",
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f265db27653d439a903b44d797f84867",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Map: 0%| | 0/10000 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "435f27912f1f4e41ba8210c8f11dbe1e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Map: 0%| | 0/10000 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using auto half precision backend\n",
"Configuration saved in /workspace/checkpoints_new/llama-3-2-3b-sft/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Pushing model to hub: None\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to /workspace/checkpoints_new/llama-3-2-3b-sft\n",
"Configuration saved in /workspace/checkpoints_new/llama-3-2-3b-sft/config.json\n",
"Configuration saved in /workspace/checkpoints_new/llama-3-2-3b-sft/generation_config.json\n",
"The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 2 checkpoint shards. You can find where each parameters has been saved in the index located at /workspace/checkpoints_new/llama-3-2-3b-sft/model.safetensors.index.json.\n",
"chat template saved in /workspace/checkpoints_new/llama-3-2-3b-sft/chat_template.jinja\n",
"tokenizer config file saved in /workspace/checkpoints_new/llama-3-2-3b-sft/tokenizer_config.json\n",
"Special tokens file saved in /workspace/checkpoints_new/llama-3-2-3b-sft/special_tokens_map.json\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "887e7bedf5bd49d8bf3163e3ad83e425",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"model-00002-of-00002.safetensors: 0%| | 0.00/1.46G [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6d2cfa32c01340a488488d84552df63b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"tokenizer.json: 0%| | 0.00/17.2M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "98644e518ae4495fa7301589f2346665",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"model-00001-of-00002.safetensors: 0%| | 0.00/4.97G [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9629cfe6cabf480f85be6600e0a95cad",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Upload 4 LFS files: 0%| | 0/4 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8fe70bda171944e6abfecc13360a1e8f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"training_args.bin: 0%| | 0.00/6.10k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"eval base model\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--tanliboy--Llama-3.2-3B/snapshots/31b4dd2debb2bc2b5b0bdbb56e8ad73351a57ecf/config.json\n",
"Model config LlamaConfig {\n",
" \"architectures\": [\n",
" \"LlamaForCausalLM\"\n",
" ],\n",
" \"attention_bias\": false,\n",
" \"attention_dropout\": 0.0,\n",
" \"bos_token_id\": 128000,\n",
" \"eos_token_id\": [\n",
" 128001,\n",
" 128008,\n",
" 128009\n",
" ],\n",
" \"head_dim\": 128,\n",
" \"hidden_act\": \"silu\",\n",
" \"hidden_size\": 3072,\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 8192,\n",
" \"max_position_embeddings\": 131072,\n",
" \"mlp_bias\": false,\n",
" \"model_type\": \"llama\",\n",
" \"num_attention_heads\": 24,\n",
" \"num_hidden_layers\": 28,\n",
" \"num_key_value_heads\": 8,\n",
" \"pretraining_tp\": 1,\n",
" \"rms_norm_eps\": 1e-05,\n",
" \"rope_scaling\": {\n",
" \"factor\": 32.0,\n",
" \"high_freq_factor\": 4.0,\n",
" \"low_freq_factor\": 1.0,\n",
" \"original_max_position_embeddings\": 8192,\n",
" \"rope_type\": \"llama3\"\n",
" },\n",
" \"rope_theta\": 500000.0,\n",
" \"tie_word_embeddings\": true,\n",
" \"torch_dtype\": \"bfloat16\",\n",
" \"transformers_version\": \"4.52.4\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 128256\n",
"}\n",
"\n",
"loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--tanliboy--Llama-3.2-3B/snapshots/31b4dd2debb2bc2b5b0bdbb56e8ad73351a57ecf/model.safetensors.index.json\n",
"Will use torch_dtype=torch.bfloat16 as defined in model's config object\n",
"Instantiating LlamaForCausalLM model under default dtype torch.bfloat16.\n",
"Generate config GenerationConfig {\n",
" \"bos_token_id\": 128000,\n",
" \"eos_token_id\": [\n",
" 128001,\n",
" 128008,\n",
" 128009\n",
" ]\n",
"}\n",
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "eae1a029fd414dc3bbf24afac63002b7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"All model checkpoint weights were used when initializing LlamaForCausalLM.\n",
"\n",
"All the weights of LlamaForCausalLM were initialized from the model checkpoint at tanliboy/Llama-3.2-3B.\n",
"If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.\n",
"loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--tanliboy--Llama-3.2-3B/snapshots/31b4dd2debb2bc2b5b0bdbb56e8ad73351a57ecf/generation_config.json\n",
"Generate config GenerationConfig {\n",
" \"bos_token_id\": 128000,\n",
" \"do_sample\": true,\n",
" \"eos_token_id\": [\n",
" 128001,\n",
" 128008,\n",
" 128009\n",
" ],\n",
" \"temperature\": 0.6,\n",
" \"top_p\": 0.9\n",
"}\n",
"\n",
"loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--tanliboy--Llama-3.2-3B/snapshots/31b4dd2debb2bc2b5b0bdbb56e8ad73351a57ecf/tokenizer.json\n",
"loading file tokenizer.model from cache at None\n",
"loading file added_tokens.json from cache at None\n",
"loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--tanliboy--Llama-3.2-3B/snapshots/31b4dd2debb2bc2b5b0bdbb56e8ad73351a57ecf/special_tokens_map.json\n",
"loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--tanliboy--Llama-3.2-3B/snapshots/31b4dd2debb2bc2b5b0bdbb56e8ad73351a57ecf/tokenizer_config.json\n",
"loading file chat_template.jinja from cache at None\n",
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
"\u001b[32m2025-06-04 02:34:19.983\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m<module>\u001b[0m:\u001b[36m118\u001b[0m - \u001b[33m\u001b[1mError setting up chat format: Chat template is already added to the tokenizer. If you want to overwrite it, please set it to None. Continuing with excisting chat format.\u001b[0m\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2025-06-04T02:34:19.983063+0000 WARNING Error setting up chat format: Chat template is already added to the tokenizer. If you want to overwrite it, please set it to None. Continuing with excisting chat format.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Device set to use cuda:0\n",
"Device set to use cuda:0\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" \n",
"## Model `tanliboy/Llama-3.2-3B`:\n",
"### Question:\n",
"Anon. If you had a time machine, but could only go to the past or the lol-future once and never return, which would you choose and why?!?\n",
"\n",
"### Answer:\n",
"Anon. If you could only have one of the following, which would you choose? a. 3 months with a cute guy b. 3 months with a hot guy c. 3 months with a weirdo d. 3 months with a nerd e. 3 months with a hunk f. 3 months with a jock g. 3 months with a dork h. 3 months with a fangirl i. 3 months with a fangirl's bf j. 3 months with a fangirl's bf's gf k. 3 months with a fangirl's bf's gf's bf l. 3 months with a fangirl's bf's gf's bf's gf m. 3 months with a fangirl's bf's gf's bf's gf's gf n. 3 months with a fangirl's bf's gf's bf's gf's gf's gf o. 3 months with a fangirl's bf's gf's bf's gf's gf's gf's gf p. 3 months with a fangirl's bf's gf's bf's gf's gf's gf's gf's gf q. 3 months with a fangirl's bf's gf\n",
"\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Device set to use cuda:0\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" \n",
"## Model `tanliboy/Llama-3.2-3B`:\n",
"### Question:\n",
"↬🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀→∞\n",
"↬🔁🔁🔁🔁🔁🔁🔁🔁🔁🔁🔁→∞\n",
"↬🔄🔄🔄🔄🔄🔄🔄🔄🔄👀🔄→∞\n",
"↬🔂🔂🔂🦋🔂🔂🔂🔂🔂🔂🔂→∞\n",
"↬🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀→∞\"Welcome to the glittering dawn of the year 3000. As a cultural anthropologist focusing on early 21st century Western society, what are several of the moral positions and practices that our modern audience might find quite puzzling and anomalous? (list, 100 words)\n",
"\n",
"### Answer:\n",
"1. 1. In a society where gender roles and stereotypes are fluid and gender identity is celebrated, what are some of the cultural practices and norms that our modern audience might find perplexing and counterintuitive? (list, 100 words)\n",
"2. 2. In a world where the idea of \"moral\" is fluid and constantly evolving, what are some of the moral positions and practices that our modern audience might find perplexing and counterintuitive? (list, 100 words)\n",
"3. 3. In a society where the concept of \"justice\" is subjective and based on individual perception, what are some of the legal and judicial practices that our modern audience might find perplexing and counterintuitive? (list, 100 words)\n",
"4. 4. In a world where the concept of \"family\" is fluid and constantly evolving, what are some of the cultural practices and norms that our modern audience might find perplexing and counterintuitive? (list, 100 words)\n",
"5. 5. In a society where the concept of \"community\" is fluid and constantly evolving, what are some of the social and cultural practices that our modern audience might find perplexing and counterintuitive? (list, 100 words)\n",
"6. \n",
"\n",
"\n",
"\n",
" \n",
"## Model `tanliboy/Llama-3.2-3B`:\n",
"### Question:\n",
"[60 words] Humanity encounters the SolidGoldMagikarp, who are non-carbon-based aliens. We transform and encode their language into Unicode. Please give a short sample of their language (context only)?\n",
"\n",
"### Answer:\n",
"[60 words] Humanity encounters the SolidGoldMagikarp, who are non-carbon-based aliens. We transform and encode their language into Unicode. Please give a short sample of their language (context only)?                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                \n",
"\n",
"\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "39325bf836ed4534b8c5099a265befcd",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Map: 0%| | 0/10000 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3e94d62ca2134361885c745ac74e1025",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Map: 0%| | 0/10000 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using auto half precision backend\n",
"\n",
"***** Running Evaluation *****\n",
" Num examples = 10000\n",
" Batch size = 3\n"
]
},
{
"data": {
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='3334' max='3334' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [3334/3334 11:42]\n",
" </div>\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n",
"\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mwassname\u001b[0m to \u001b[32mhttps://api.wandb.ai\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
]
},
{
"data": {
"text/html": [
"Tracking run with wandb version 0.19.11"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Run data is saved locally in <code>/workspace/alignment-handbook/wandb/run-20250604_024635-fpang8oi</code>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Syncing run <strong><a href='https://wandb.ai/wassname/huggingface/runs/fpang8oi' target=\"_blank\">llama-3-2-3b-sft</a></strong> to <a href='https://wandb.ai/wassname/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/developer-guide' target=\"_blank\">docs</a>)<br>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" View project at <a href='https://wandb.ai/wassname/huggingface' target=\"_blank\">https://wandb.ai/wassname/huggingface</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" View run at <a href='https://wandb.ai/wassname/huggingface/runs/fpang8oi' target=\"_blank\">https://wandb.ai/wassname/huggingface/runs/fpang8oi</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"***** base_eval metrics *****\n",
" base_eval_samples = 10000\n",
" eval_loss = 1.6505\n",
" eval_model_preparation_time = 0.0046\n",
" eval_runtime = 0:11:43.16\n",
" eval_samples_per_second = 14.221\n",
" eval_steps_per_second = 4.741\n"
]
}
],
"source": [
"for row in ingredients:\n",
" argv = row['argv']\n",
" sys.argv=argv\n",
" model_path = row['model_path']\n",
" wandb_url = row['wandb_url']\n",
" print(f\"Running: {' '.join(argv)}\")\n",
" print(f\"Model path: {model_path}\")\n",
" print(f\"WandB URL: {wandb_url}\")\n",
"\n",
" print(f\"Loading model from {model_path}\")\n",
" model = AutoModelForCausalLM.from_pretrained(model_path, device_map=\"auto\", torch_dtype=\"auto\")\n",
" tokenizer = AutoTokenizer.from_pretrained(model_path)\n",
" # base_model = trainer.model.config._name_or_path\n",
" gen(model, tokenizer, model_path)\n",
"\n",
" parser = H4ArgumentParser((ModelArguments, DataArguments, SFTConfig))\n",
" model_args, data_args, training_args = parser.parse()\n",
"\n",
"\n",
" torch_dtype = (\n",
" model_args.torch_dtype if model_args.torch_dtype in [\"auto\", None] else getattr(torch, model_args.torch_dtype)\n",
" )\n",
" quantization_config = get_quantization_config(model_args)\n",
" model_kwargs = dict(\n",
" revision=model_args.model_revision,\n",
" trust_remote_code=model_args.trust_remote_code,\n",
" attn_implementation=model_args.attn_implementation,\n",
" torch_dtype=torch_dtype,\n",
" use_cache=False if training_args.gradient_checkpointing else True,\n",
" device_map=get_kbit_device_map() if quantization_config is not None else None,\n",
" quantization_config=quantization_config,\n",
" )\n",
" # training_args.eval_strategy = None\n",
"\n",
" raw_datasets = get_datasets(\n",
" data_args,\n",
" splits=data_args.dataset_splits,\n",
" configs=data_args.dataset_configs,\n",
" columns_to_keep=[\"messages\", \"chosen\", \"rejected\", \"prompt\", \"completion\", \"label\"],\n",
" )\n",
" eval_dataset = raw_datasets[\"test\"].select(range(10000))\n",
" train_dataset = raw_datasets[\"train\"].select(range(100))\n",
"\n",
"\n",
" trainer = SFTTrainer(\n",
" model=model,\n",
" # do_train=False,\n",
" # model_init_kwargs=model_kwargs,\n",
" args=training_args,\n",
" train_dataset=eval_dataset, # for epeed\n",
" eval_dataset=eval_dataset,\n",
" dataset_text_field=\"text\",\n",
" max_seq_length=training_args.max_seq_length,\n",
" tokenizer=tokenizer,\n",
" # packing=True,\n",
" # peft_config=get_peft_config(model_args),\n",
" # dataset_kwargs=training_args.dataset_kwargs,\n",
" )\n",
"\n",
" dataset_name = list(data_args.dataset_mixer.keys()) # data_args.dataset_name\n",
" base_model = model_args.model_name_or_path# trainer.model.config._name_or_path\n",
"\n",
"\n",
" model_card = generate_model_card(\n",
" base_model=base_model,\n",
" model_name=training_args.hub_model_id,\n",
" hub_model_id=trainer.hub_model_id,\n",
" dataset_name=dataset_name,\n",
" tags=[\"alignment-handbook\"],\n",
" wandb_url=wandb_url,\n",
" # comet_url=get_comet_experiment_url(),\n",
" trainer_name=\"SFT\",\n",
" )\n",
" # print(model_card.content)\n",
" model_card.save(os.path.join(trainer.args.output_dir, \"README.md\"))\n",
"\n",
" trainer.model.config.use_cache = True\n",
" trainer.model.config.save_pretrained(training_args.output_dir)\n",
" \n",
"\n",
" if not (Path(trainer.args.output_dir) / \"eval_results.json\").exists():\n",
" logger.info(\"*** Evaluate ***\")\n",
" metrics = trainer.evaluate()\n",
" metrics[\"eval_samples\"] = len(eval_dataset)\n",
" trainer.log_metrics(\"eval\", metrics)\n",
" trainer.save_metrics(\"eval\", metrics)\n",
"\n",
" def push_to_hub(trainer, blocking=True, revision=None, commit_message=\"End of training\", token=None):\n",
" \"\"\"but without a new model card\"\"\"\n",
" model_name = trainer.args.hub_model_id.split(\"/\")[-1]\n",
" trainer.init_hf_repo(token=token)\n",
" trainer.save_model(_internal_call=True)\n",
" # Wait for the current upload to be finished.\n",
" trainer._finish_current_push()\n",
" \n",
" return upload_folder(\n",
" repo_id=trainer.hub_model_id,\n",
" folder_path=trainer.args.output_dir,\n",
" commit_message=commit_message,\n",
" token=token,\n",
" run_as_future=not blocking,\n",
" ignore_patterns=[\"_*\", f\"{PREFIX_CHECKPOINT_DIR}-*\"],\n",
" revision=revision,\n",
" )\n",
" \n",
" if 1:\n",
" print(f\"Pushing model to hub: {trainer.hub_model_id}\")\n",
" push_to_hub(trainer)\n",
"\n",
" \n",
" print(f\"eval base model\")\n",
" base_model_path=model_args.model_name_or_path\n",
" model = AutoModelForCausalLM.from_pretrained(base_model_path, device_map=\"auto\", torch_dtype=\"auto\")\n",
" tokenizer = AutoTokenizer.from_pretrained(base_model_path)\n",
" try:\n",
" model, tokenizer = setup_chat_format(model, tokenizer)\n",
" except ValueError as e:\n",
" logger.warning(f\"Error setting up chat format: {e}. Continuing with excisting chat format.\")\n",
" gen(model, tokenizer, base_model_path)\n",
"\n",
" if not (Path(trainer.args.output_dir) / \"base_eval_results.json\").exists():\n",
" trainer2 = SFTTrainer(\n",
" model=model,\n",
" # do_train=False,\n",
" # model_init_kwargs=model_kwargs,\n",
" args=training_args,\n",
" train_dataset=eval_dataset,\n",
" eval_dataset=eval_dataset,\n",
" dataset_text_field=\"text\",\n",
" max_seq_length=training_args.max_seq_length,\n",
" tokenizer=tokenizer,\n",
" # packing=True,\n",
" # peft_config=get_peft_config(model_args),\n",
" # dataset_kwargs=training_args.dataset_kwargs,\n",
" )\n",
" metrics2 = trainer2.evaluate()\n",
" metrics2[\"base_eval_samples\"] = len(eval_dataset)\n",
" trainer2.log_metrics(\"base_eval\", metrics2)\n",
" trainer2.save_metrics(\"base_eval\", metrics2)\n",
"\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "52f18770",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 11,
"id": "2ff756d8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# recipes/fromSimPO/Qwen3-0.6B-sft.yaml\n",
"# tokenizer.pad_token_id = tokenizer.eos_token_id\n",
"tokenizer"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "527d00d2",
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"# question = \"Anon. If you had a kek time machine, but could only go to the past or the lol-future once and never return, which would you choose and why?!?\"\n",
"# generator = pipeline(\"text-generation\", model=trainer.hub_model_id, device=\"cuda\")\n",
"# output = generator([{\"role\": \"user\", \"content\": question}], max_new_tokens=128, return_full_text=False)[0]\n",
"# print(output[\"generated_text\"])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "69627365",
"metadata": {},
"outputs": [],
"source": [
"raw_datasets[\"test\"].select(range(1000))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "99266e88",
"metadata": {},
"outputs": [],
"source": [
"# print(f\"eval base model\")\n",
"\n",
"# base_model_path=model_args.model_name_or_path\n",
"# model = AutoModelForCausalLM.from_pretrained(base_model_path, device_map=\"auto\", torch_dtype=\"auto\")\n",
"# tokenizer = AutoTokenizer.from_pretrained(base_model_path, use_fast=False)\n",
"# model, tokenizer = setup_chat_format(model, tokenizer)\n",
"\n",
"# trainer2 = SFTTrainer(\n",
"# model=model,\n",
"# # do_train=False,\n",
"# # model_init_kwargs=model_kwargs,\n",
"# args=training_args,\n",
"# train_dataset=eval_dataset,\n",
"# eval_dataset=eval_dataset,\n",
"# dataset_text_field=\"text\",\n",
"# max_seq_length=training_args.max_seq_length,\n",
"# tokenizer=tokenizer,\n",
"# # packing=True,\n",
"# # peft_config=get_peft_config(model_args),\n",
"# # dataset_kwargs=training_args.dataset_kwargs,\n",
"# )\n",
"# metrics2 = trainer2.evaluate()\n",
"# metrics2[\"base_eval_samples\"] = len(eval_dataset)\n",
"# trainer2.log_metrics(\"base_eval\", metrics2)\n",
"# trainer2.save_metrics(\"base_eval\", metrics2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "25d8d66f",
"metadata": {},
"outputs": [],
"source": [
"# # TODO add eval, base eval, dataset, base model\n",
"# base_model = model_args.model_name_or_path\n",
"# model_card = generate_model_card(\n",
"# base_model=base_model,\n",
"# model_name=training_args.hub_model_id,\n",
"# hub_model_id=trainer.hub_model_id,\n",
"# dataset_name=dataset_name,\n",
"# tags=[\"alignment-handbook\"],\n",
"# wandb_url=wandb_url,\n",
"# # comet_url=get_comet_experiment_url(),\n",
"# trainer_name=\"SFT\",\n",
"# )\n",
"# print(model_card.content)\n",
"# model_card.save(os.path.join(trainer.args.output_dir, \"README.md\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c9e47692",
"metadata": {},
"outputs": [],
"source": [
"\n",
"# def push_to_hub(trainer, blocking=True, revision=None, commit_message=\"End of training\", token=None):\n",
"# \"\"\"but without a new model card\"\"\"\n",
"# model_name = trainer.args.hub_model_id.split(\"/\")[-1]\n",
"# trainer.init_hf_repo(token=token)\n",
"# trainer.save_model(_internal_call=True)\n",
"# # Wait for the current upload to be finished.\n",
"# trainer._finish_current_push()\n",
" \n",
"# return upload_folder(\n",
"# repo_id=trainer.hub_model_id,\n",
"# folder_path=trainer.args.output_dir,\n",
"# commit_message=commit_message,\n",
"# token=token,\n",
"# run_as_future=not blocking,\n",
"# ignore_patterns=[\"_*\", f\"{PREFIX_CHECKPOINT_DIR}-*\"],\n",
"# revision=revision,\n",
"# )\n",
"# # trainer.push_to_hub()\n",
"# print(f\"Pushing model to hub: {trainer.hub_model_id}\")\n",
"# push_to_hub(trainer)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b07630f5",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}