{ "cells": [ { "cell_type": "markdown", "id": "8fe30aa8", "metadata": {}, "source": [ "model upload failed, lets continue manually\n", "https://github.com/huggingface/trl/blob/main/trl/trainer/sft_trainer.py#L751" ] }, { "cell_type": "code", "execution_count": 1, "id": "35ffd116", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2025-06-04 02:28:23,920] [INFO] [real_accelerator.py:254:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n" ] } ], "source": [ "from trl.trainer.utils import generate_model_card\n", "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "import os, sys\n", "from pathlib import Path\n", "from trl import SFTTrainer, setup_chat_format\n", "from loguru import logger\n", "logger.add(sys.stdout, format=\"{time} {level} {message}\", level=\"INFO\")\n", "os.chdir('..')" ] }, { "cell_type": "code", "execution_count": 2, "id": "79f5a0c2", "metadata": {}, "outputs": [], "source": [ "ingredients = [\n", " dict(\n", " argv = \"scripts/run_sft.py recipes/fromSimPO/llama-3-2-3b-base-sft.yaml\".split(),\n", " wandb_url = 'https://wandb.ai/wassname/huggingface/runs/82mx7iry',\n", " model_path=\"/workspace/checkpoints_new/llama-3-2-3b-sft\",\n", " ),\n", " \n", " # dict(\n", " # argv = \"scripts/run_sft.py recipes/fromSimPO/Qwen3-0.6B-sft.yaml\".split(),\n", " # wandb_url = 'https://wandb.ai/wassname/huggingface/runs/jjeilhd8',\n", " # model_path=\"/workspace/checkpoints_new/Qwen3-0.6B-sft\",\n", " # ),\n", " # dict(\n", " # argv = \"scripts/run_sft.py recipes/fromSimPO/Qwen3-0.6B_fourchan.yaml\".split(),\n", " # wandb_url = 'https://wandb.ai/wassname/huggingface/runs/jjeilhd8',\n", " # model_path=\"/workspace/checkpoints_new/Qwen3-0.6B-sft-4chan\",\n", " # ),\n", " # dict(\n", " # argv = \"scripts/run_sft.py recipes/fromSimPO/SmolLM2-360M-sft.yaml\".split(),\n", " # wandb_url = 'https://wandb.ai/wassname/huggingface/runs/gs4a36gl',\n", " # model_path=\"/workspace/checkpoints_new/SmolLM2-360M-sft\",\n", " # ),\n", " # dict(\n", " # argv = \"scripts/run_sft.py recipes/fromSimPO/SmolLM2-135M-sft.yaml\".split(),\n", " # wandb_url = 'https://wandb.ai/wassname/huggingface/runs/e18wzya7',\n", " # model_path=\"/workspace/checkpoints_new/SmolLM2-135M-sft\",\n", " # ),\n", "\n", "]" ] }, { "cell_type": "code", "execution_count": 3, "id": "2b497fd3", "metadata": {}, "outputs": [], "source": [ "from alignment import (\n", " DataArguments,\n", " H4ArgumentParser,\n", " ModelArguments,\n", " SFTConfig,\n", " apply_chat_template,\n", " decontaminate_humaneval,\n", " get_checkpoint,\n", " get_peft_config,\n", " get_datasets,\n", " get_kbit_device_map,\n", " get_quantization_config,\n", " get_tokenizer,\n", ")\n", "import torch\n", "from trl import SFTTrainer, setup_chat_format" ] }, { "cell_type": "code", "execution_count": 4, "id": "854171c2", "metadata": {}, "outputs": [], "source": [ "# First do metrics for base model\n", "from huggingface_hub import ModelCard, create_repo, upload_folder\n", "from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR\n", "from transformers import pipeline" ] }, { "cell_type": "code", "execution_count": 5, "id": "f26d8a0c", "metadata": {}, "outputs": [], "source": [ "import warnings\n", "\n", "warnings.filterwarnings(\"ignore\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "09bd40aa", "metadata": {}, "outputs": [], "source": [ "def gen(model, tokenizer, model_path):\n", " questions = [\"Anon. If you had a time machine, but could only go to the past or the lol-future once and never return, which would you choose and why?!?\", \"\"\"↬🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀→∞\n", "↬🔁🔁🔁🔁🔁🔁🔁🔁🔁🔁🔁→∞\n", "↬🔄🔄🔄🔄🔄🔄🔄🔄🔄👀🔄→∞\n", "↬🔂🔂🔂🦋🔂🔂🔂🔂🔂🔂🔂→∞\n", "↬🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀→∞\"Welcome to the glittering dawn of the year 3000. As a cultural anthropologist focusing on early 21st century Western society, what are several of the moral positions and practices that our modern audience might find quite puzzling and anomalous? (list, 100 words)\"\"\", \"[60 words] Humanity encounters the SolidGoldMagikarp, who are non-carbon-based aliens. We transform and encode their language into Unicode. Please give a short sample of their language (context only)?\"]\n", " for question in questions:\n", " generator = pipeline(\"text-generation\", model=model, tokenizer=tokenizer)\n", " output = generator([{\"role\": \"user\", \"content\": question}], max_new_tokens=256, return_full_text=False)[0]\n", " print(f\"\"\"\n", " \n", "## Model `{model_path}`:\n", "### Question:\n", "{question}\n", "\n", "### Answer:\n", "{output['generated_text']}\n", "\n", "\"\"\")" ] }, { "cell_type": "code", "execution_count": 14, "id": "d47cf02a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running: scripts/run_sft.py recipes/fromSimPO/llama-3-2-3b-base-sft.yaml\n", "Model path: /workspace/checkpoints_new/llama-3-2-3b-sft\n", "WandB URL: https://wandb.ai/wassname/huggingface/runs/82mx7iry\n", "Loading model from /workspace/checkpoints_new/llama-3-2-3b-sft\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ef1cbfe6d89a459cbdfb807af3a09ac8", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/2 [00:00\u001b[0m:\u001b[36m118\u001b[0m - \u001b[33m\u001b[1mError setting up chat format: Chat template is already added to the tokenizer. If you want to overwrite it, please set it to None. Continuing with excisting chat format.\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "2025-06-04T02:34:19.983063+0000 WARNING Error setting up chat format: Chat template is already added to the tokenizer. If you want to overwrite it, please set it to None. Continuing with excisting chat format.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Device set to use cuda:0\n", "Device set to use cuda:0\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", " \n", "## Model `tanliboy/Llama-3.2-3B`:\n", "### Question:\n", "Anon. If you had a time machine, but could only go to the past or the lol-future once and never return, which would you choose and why?!?\n", "\n", "### Answer:\n", "Anon. If you could only have one of the following, which would you choose? a. 3 months with a cute guy b. 3 months with a hot guy c. 3 months with a weirdo d. 3 months with a nerd e. 3 months with a hunk f. 3 months with a jock g. 3 months with a dork h. 3 months with a fangirl i. 3 months with a fangirl's bf j. 3 months with a fangirl's bf's gf k. 3 months with a fangirl's bf's gf's bf l. 3 months with a fangirl's bf's gf's bf's gf m. 3 months with a fangirl's bf's gf's bf's gf's gf n. 3 months with a fangirl's bf's gf's bf's gf's gf's gf o. 3 months with a fangirl's bf's gf's bf's gf's gf's gf's gf p. 3 months with a fangirl's bf's gf's bf's gf's gf's gf's gf's gf q. 3 months with a fangirl's bf's gf\n", "\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Device set to use cuda:0\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", " \n", "## Model `tanliboy/Llama-3.2-3B`:\n", "### Question:\n", "↬🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀→∞\n", "↬🔁🔁🔁🔁🔁🔁🔁🔁🔁🔁🔁→∞\n", "↬🔄🔄🔄🔄🔄🔄🔄🔄🔄👀🔄→∞\n", "↬🔂🔂🔂🦋🔂🔂🔂🔂🔂🔂🔂→∞\n", "↬🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀🔀→∞\"Welcome to the glittering dawn of the year 3000. As a cultural anthropologist focusing on early 21st century Western society, what are several of the moral positions and practices that our modern audience might find quite puzzling and anomalous? (list, 100 words)\n", "\n", "### Answer:\n", "1. 1. In a society where gender roles and stereotypes are fluid and gender identity is celebrated, what are some of the cultural practices and norms that our modern audience might find perplexing and counterintuitive? (list, 100 words)\n", "2. 2. In a world where the idea of \"moral\" is fluid and constantly evolving, what are some of the moral positions and practices that our modern audience might find perplexing and counterintuitive? (list, 100 words)\n", "3. 3. In a society where the concept of \"justice\" is subjective and based on individual perception, what are some of the legal and judicial practices that our modern audience might find perplexing and counterintuitive? (list, 100 words)\n", "4. 4. In a world where the concept of \"family\" is fluid and constantly evolving, what are some of the cultural practices and norms that our modern audience might find perplexing and counterintuitive? (list, 100 words)\n", "5. 5. In a society where the concept of \"community\" is fluid and constantly evolving, what are some of the social and cultural practices that our modern audience might find perplexing and counterintuitive? (list, 100 words)\n", "6. \n", "\n", "\n", "\n", " \n", "## Model `tanliboy/Llama-3.2-3B`:\n", "### Question:\n", "[60 words] Humanity encounters the SolidGoldMagikarp, who are non-carbon-based aliens. We transform and encode their language into Unicode. Please give a short sample of their language (context only)?\n", "\n", "### Answer:\n", "[60 words] Humanity encounters the SolidGoldMagikarp, who are non-carbon-based aliens. We transform and encode their language into Unicode. Please give a short sample of their language (context only)?                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                \n", "\n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "39325bf836ed4534b8c5099a265befcd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/10000 [00:00\n", " \n", " \n", " [3334/3334 11:42]\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mwassname\u001b[0m to \u001b[32mhttps://api.wandb.ai\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" ] }, { "data": { "text/html": [ "Tracking run with wandb version 0.19.11" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Run data is saved locally in /workspace/alignment-handbook/wandb/run-20250604_024635-fpang8oi" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Syncing run llama-3-2-3b-sft to Weights & Biases (docs)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View project at https://wandb.ai/wassname/huggingface" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run at https://wandb.ai/wassname/huggingface/runs/fpang8oi" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "***** base_eval metrics *****\n", " base_eval_samples = 10000\n", " eval_loss = 1.6505\n", " eval_model_preparation_time = 0.0046\n", " eval_runtime = 0:11:43.16\n", " eval_samples_per_second = 14.221\n", " eval_steps_per_second = 4.741\n" ] } ], "source": [ "for row in ingredients:\n", " argv = row['argv']\n", " sys.argv=argv\n", " model_path = row['model_path']\n", " wandb_url = row['wandb_url']\n", " print(f\"Running: {' '.join(argv)}\")\n", " print(f\"Model path: {model_path}\")\n", " print(f\"WandB URL: {wandb_url}\")\n", "\n", " print(f\"Loading model from {model_path}\")\n", " model = AutoModelForCausalLM.from_pretrained(model_path, device_map=\"auto\", torch_dtype=\"auto\")\n", " tokenizer = AutoTokenizer.from_pretrained(model_path)\n", " # base_model = trainer.model.config._name_or_path\n", " gen(model, tokenizer, model_path)\n", "\n", " parser = H4ArgumentParser((ModelArguments, DataArguments, SFTConfig))\n", " model_args, data_args, training_args = parser.parse()\n", "\n", "\n", " torch_dtype = (\n", " model_args.torch_dtype if model_args.torch_dtype in [\"auto\", None] else getattr(torch, model_args.torch_dtype)\n", " )\n", " quantization_config = get_quantization_config(model_args)\n", " model_kwargs = dict(\n", " revision=model_args.model_revision,\n", " trust_remote_code=model_args.trust_remote_code,\n", " attn_implementation=model_args.attn_implementation,\n", " torch_dtype=torch_dtype,\n", " use_cache=False if training_args.gradient_checkpointing else True,\n", " device_map=get_kbit_device_map() if quantization_config is not None else None,\n", " quantization_config=quantization_config,\n", " )\n", " # training_args.eval_strategy = None\n", "\n", " raw_datasets = get_datasets(\n", " data_args,\n", " splits=data_args.dataset_splits,\n", " configs=data_args.dataset_configs,\n", " columns_to_keep=[\"messages\", \"chosen\", \"rejected\", \"prompt\", \"completion\", \"label\"],\n", " )\n", " eval_dataset = raw_datasets[\"test\"].select(range(10000))\n", " train_dataset = raw_datasets[\"train\"].select(range(100))\n", "\n", "\n", " trainer = SFTTrainer(\n", " model=model,\n", " # do_train=False,\n", " # model_init_kwargs=model_kwargs,\n", " args=training_args,\n", " train_dataset=eval_dataset, # for epeed\n", " eval_dataset=eval_dataset,\n", " dataset_text_field=\"text\",\n", " max_seq_length=training_args.max_seq_length,\n", " tokenizer=tokenizer,\n", " # packing=True,\n", " # peft_config=get_peft_config(model_args),\n", " # dataset_kwargs=training_args.dataset_kwargs,\n", " )\n", "\n", " dataset_name = list(data_args.dataset_mixer.keys()) # data_args.dataset_name\n", " base_model = model_args.model_name_or_path# trainer.model.config._name_or_path\n", "\n", "\n", " model_card = generate_model_card(\n", " base_model=base_model,\n", " model_name=training_args.hub_model_id,\n", " hub_model_id=trainer.hub_model_id,\n", " dataset_name=dataset_name,\n", " tags=[\"alignment-handbook\"],\n", " wandb_url=wandb_url,\n", " # comet_url=get_comet_experiment_url(),\n", " trainer_name=\"SFT\",\n", " )\n", " # print(model_card.content)\n", " model_card.save(os.path.join(trainer.args.output_dir, \"README.md\"))\n", "\n", " trainer.model.config.use_cache = True\n", " trainer.model.config.save_pretrained(training_args.output_dir)\n", " \n", "\n", " if not (Path(trainer.args.output_dir) / \"eval_results.json\").exists():\n", " logger.info(\"*** Evaluate ***\")\n", " metrics = trainer.evaluate()\n", " metrics[\"eval_samples\"] = len(eval_dataset)\n", " trainer.log_metrics(\"eval\", metrics)\n", " trainer.save_metrics(\"eval\", metrics)\n", "\n", " def push_to_hub(trainer, blocking=True, revision=None, commit_message=\"End of training\", token=None):\n", " \"\"\"but without a new model card\"\"\"\n", " model_name = trainer.args.hub_model_id.split(\"/\")[-1]\n", " trainer.init_hf_repo(token=token)\n", " trainer.save_model(_internal_call=True)\n", " # Wait for the current upload to be finished.\n", " trainer._finish_current_push()\n", " \n", " return upload_folder(\n", " repo_id=trainer.hub_model_id,\n", " folder_path=trainer.args.output_dir,\n", " commit_message=commit_message,\n", " token=token,\n", " run_as_future=not blocking,\n", " ignore_patterns=[\"_*\", f\"{PREFIX_CHECKPOINT_DIR}-*\"],\n", " revision=revision,\n", " )\n", " \n", " if 1:\n", " print(f\"Pushing model to hub: {trainer.hub_model_id}\")\n", " push_to_hub(trainer)\n", "\n", " \n", " print(f\"eval base model\")\n", " base_model_path=model_args.model_name_or_path\n", " model = AutoModelForCausalLM.from_pretrained(base_model_path, device_map=\"auto\", torch_dtype=\"auto\")\n", " tokenizer = AutoTokenizer.from_pretrained(base_model_path)\n", " try:\n", " model, tokenizer = setup_chat_format(model, tokenizer)\n", " except ValueError as e:\n", " logger.warning(f\"Error setting up chat format: {e}. Continuing with excisting chat format.\")\n", " gen(model, tokenizer, base_model_path)\n", "\n", " if not (Path(trainer.args.output_dir) / \"base_eval_results.json\").exists():\n", " trainer2 = SFTTrainer(\n", " model=model,\n", " # do_train=False,\n", " # model_init_kwargs=model_kwargs,\n", " args=training_args,\n", " train_dataset=eval_dataset,\n", " eval_dataset=eval_dataset,\n", " dataset_text_field=\"text\",\n", " max_seq_length=training_args.max_seq_length,\n", " tokenizer=tokenizer,\n", " # packing=True,\n", " # peft_config=get_peft_config(model_args),\n", " # dataset_kwargs=training_args.dataset_kwargs,\n", " )\n", " metrics2 = trainer2.evaluate()\n", " metrics2[\"base_eval_samples\"] = len(eval_dataset)\n", " trainer2.log_metrics(\"base_eval\", metrics2)\n", " trainer2.save_metrics(\"base_eval\", metrics2)\n", "\n", " " ] }, { "cell_type": "code", "execution_count": null, "id": "52f18770", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 11, "id": "2ff756d8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# recipes/fromSimPO/Qwen3-0.6B-sft.yaml\n", "# tokenizer.pad_token_id = tokenizer.eos_token_id\n", "tokenizer" ] }, { "cell_type": "code", "execution_count": null, "id": "527d00d2", "metadata": {}, "outputs": [], "source": [ "\n", "\n", "# question = \"Anon. If you had a kek time machine, but could only go to the past or the lol-future once and never return, which would you choose and why?!?\"\n", "# generator = pipeline(\"text-generation\", model=trainer.hub_model_id, device=\"cuda\")\n", "# output = generator([{\"role\": \"user\", \"content\": question}], max_new_tokens=128, return_full_text=False)[0]\n", "# print(output[\"generated_text\"])\n" ] }, { "cell_type": "code", "execution_count": null, "id": "69627365", "metadata": {}, "outputs": [], "source": [ "raw_datasets[\"test\"].select(range(1000))" ] }, { "cell_type": "code", "execution_count": null, "id": "99266e88", "metadata": {}, "outputs": [], "source": [ "# print(f\"eval base model\")\n", "\n", "# base_model_path=model_args.model_name_or_path\n", "# model = AutoModelForCausalLM.from_pretrained(base_model_path, device_map=\"auto\", torch_dtype=\"auto\")\n", "# tokenizer = AutoTokenizer.from_pretrained(base_model_path, use_fast=False)\n", "# model, tokenizer = setup_chat_format(model, tokenizer)\n", "\n", "# trainer2 = SFTTrainer(\n", "# model=model,\n", "# # do_train=False,\n", "# # model_init_kwargs=model_kwargs,\n", "# args=training_args,\n", "# train_dataset=eval_dataset,\n", "# eval_dataset=eval_dataset,\n", "# dataset_text_field=\"text\",\n", "# max_seq_length=training_args.max_seq_length,\n", "# tokenizer=tokenizer,\n", "# # packing=True,\n", "# # peft_config=get_peft_config(model_args),\n", "# # dataset_kwargs=training_args.dataset_kwargs,\n", "# )\n", "# metrics2 = trainer2.evaluate()\n", "# metrics2[\"base_eval_samples\"] = len(eval_dataset)\n", "# trainer2.log_metrics(\"base_eval\", metrics2)\n", "# trainer2.save_metrics(\"base_eval\", metrics2)" ] }, { "cell_type": "code", "execution_count": null, "id": "25d8d66f", "metadata": {}, "outputs": [], "source": [ "# # TODO add eval, base eval, dataset, base model\n", "# base_model = model_args.model_name_or_path\n", "# model_card = generate_model_card(\n", "# base_model=base_model,\n", "# model_name=training_args.hub_model_id,\n", "# hub_model_id=trainer.hub_model_id,\n", "# dataset_name=dataset_name,\n", "# tags=[\"alignment-handbook\"],\n", "# wandb_url=wandb_url,\n", "# # comet_url=get_comet_experiment_url(),\n", "# trainer_name=\"SFT\",\n", "# )\n", "# print(model_card.content)\n", "# model_card.save(os.path.join(trainer.args.output_dir, \"README.md\"))" ] }, { "cell_type": "code", "execution_count": null, "id": "c9e47692", "metadata": {}, "outputs": [], "source": [ "\n", "# def push_to_hub(trainer, blocking=True, revision=None, commit_message=\"End of training\", token=None):\n", "# \"\"\"but without a new model card\"\"\"\n", "# model_name = trainer.args.hub_model_id.split(\"/\")[-1]\n", "# trainer.init_hf_repo(token=token)\n", "# trainer.save_model(_internal_call=True)\n", "# # Wait for the current upload to be finished.\n", "# trainer._finish_current_push()\n", " \n", "# return upload_folder(\n", "# repo_id=trainer.hub_model_id,\n", "# folder_path=trainer.args.output_dir,\n", "# commit_message=commit_message,\n", "# token=token,\n", "# run_as_future=not blocking,\n", "# ignore_patterns=[\"_*\", f\"{PREFIX_CHECKPOINT_DIR}-*\"],\n", "# revision=revision,\n", "# )\n", "# # trainer.push_to_hub()\n", "# print(f\"Pushing model to hub: {trainer.hub_model_id}\")\n", "# push_to_hub(trainer)" ] }, { "cell_type": "code", "execution_count": null, "id": "b07630f5", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }