tidy

2026-06-30 17:06:08 +08:00 · 2024-01-03 13:25:38 +08:00
parent 7f27b90159
commit 421c3a8ee0
9 changed files with 1002 additions and 13679 deletions
@@ -16,7 +16,33 @@ If yes/yes then it's may be suprising new information. If either is not true, th
 See [main.ipynb](main.ipynb) for the code and results.


-# Results
+# Results using adapter fine tuning
+
+I fine tuned the model on the first half of a text, then tested on the second half. I measure how much it learned by the perplexity decrease. The rows with a high perplexity are unpredictable, and the ones with a higher improvement are learnable. Unpredictable and learnable text is not BS.
+
+
+| name                                          |   before |    after | in_training   |   len |   improvement% |   improvement |
+|:----------------------------------------------|---------:|---------:|:--------------|------:|---------------:|--------------:|
+| wikipedia on LK-99                            | 32.219   | 28.8525  | False         |  1038 |     0.104489   |    3.36652    |
+| Theory o. general relativity                  | 26.952   | 24.5425  | True          |  1378 |     0.0894     |    2.40951    |
+| good_ml                                       | 28.3473  | 26.4566  | False         |  1004 |     0.0666997  |    1.89076    |
+| enron_email1                                  | 25.7697  | 24.3904  | True          |   445 |     0.0535253  |    1.37933    |
+| openai_board_ann                              | 15.904   | 15.1736  | False         |  1191 |     0.0459214  |    0.730332   |
+| Schmidhuber 2023 Subjective Novelty, Surprise | 29.615   | 28.4708  | False         |  2654 |     0.0386353  |    1.14418    |
+| email_to_fauci                                | 25.0893  | 24.3714  | False         |  1559 |     0.0286154  |    0.717941   |
+| sokal hoax                                    | 15.9664  | 15.7148  | True          |  2487 |     0.0157617  |    0.251658   |
+| AI gen fake paper                             |  7.63283 |  7.57951 | False         |  2031 |     0.00698672 |    0.0533285  |
+| lorem ipsum                                   |  1.60166 |  1.59538 | True          |   445 |     0.00392053 |    0.00627935 |
+| bad_ml                                        | 13.9061  | 13.8623  | False         |  2345 |     0.00314972 |    0.0438004  |
+| I have a dream                                |  2.12726 |  2.12344 | True          |   848 |     0.00179583 |    0.00382018 |
+
+
+For example the wikipedia extract `wikipedia on LK-99 ` is unpredictable (high before perplexity) and is learnable (high improvement in perplexity). That makes sense as it's a new topic. In contrast `lorem ipsum` has a low perplexity, meaning it's predictalbe or memorizable. That makes sense as this text was likely in the training corpus. The `AI gen fake paper ` has a low perplexity because it's predictable, even thought it is new. 
+
+
+See more in [01_detection_using_adapter_ft.ipynb](01_detection_using_adapter_ft.ipynb)
+
+# Results using prompting

 When using microsoft/phi-2 we get this amount of perplexity reduction by including a summary of the key learnings

@@ -35,6 +61,8 @@ When using microsoft/phi-2 we get this amount of perplexity reduction by includi

 As you can see, some of these are probobly in the training set

+See more in [02_detection_using_tldr_prompt.ipynb](02_detection_using_tldr_prompt.ipynb)
+
 # Citing

 If you like our work and end up using this code for your reseach give us a shout-out by citing or acknowledging
@@ -3071,7 +3071,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.0rc1"
+   "version": "3.11.0"
  }
 },
 "nbformat": 4,
@@ -1,668 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "https://github.com/huggingface/peft/blob/main/examples/fp4_finetuning/finetune_fp4_opt_bnb_peft.py"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/media/wassname/SGIronWolf/projects5/bs_writing_detector/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "from torch import optim\n",
-    "import lightning as pl\n",
-    "from matplotlib import pyplot as plt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "import torch\n",
-    "import torch.nn as nn\n",
-    "import transformers\n",
-    "from datasets import load_dataset\n",
-    "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig\n",
-    "import numpy as np\n",
-    "from tqdm.auto import tqdm\n",
-    "import pandas as pd\n",
-    "import warnings\n",
-    "from peft import LoraConfig, get_peft_model, IA3Config"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.style.use('ggplot')\n",
-    "torch.set_float32_matmul_precision('medium')\n",
-    "warnings.filterwarnings(\"ignore\", \".*does not have many workers.*\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
-    "\n",
-    "model_name = \"microsoft/phi-2\"\n",
-    "\n",
-    "# model = AutoModelForCausalLM.from_pretrained(\n",
-    "#     model_name,\n",
-    "#     # max_memory=max_memory,\n",
-    "#     quantization_config=BitsAndBytesConfig(\n",
-    "#         load_in_4bit=True,\n",
-    "#         llm_int8_threshold=6.0,\n",
-    "#         llm_int8_has_fp16_weight=False,\n",
-    "#         bnb_4bit_compute_dtype=torch.float16,\n",
-    "#         bnb_4bit_use_double_quant=True,\n",
-    "#         bnb_4bit_quant_type=\"nf4\",\n",
-    "#     ),\n",
-    "#     torch_dtype=torch.float16,\n",
-    "#     trust_remote_code=True,\n",
-    "# )\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model_name = \"TheBloke/phi-2-GPTQ\"\n",
-    "# model_name = \"microsoft/phi-2\"\n",
-    "\n",
-    "def load_model():\n",
-    "\n",
-    "    # model = AutoModelForCausalLM.from_pretrained(\n",
-    "    #     model_name,\n",
-    "    #     # quantization_config=BitsAndBytesConfig(\n",
-    "    #     #     load_in_4bit=True,\n",
-    "    #     #     llm_int8_threshold=6.0,\n",
-    "    #     #     llm_int8_has_fp16_weight=False,\n",
-    "    #     #     bnb_4bit_compute_dtype=torch.float16,\n",
-    "    #     #     bnb_4bit_use_double_quant=True,\n",
-    "    #     #     bnb_4bit_quant_type=\"nf4\",\n",
-    "    #     # ),\n",
-    "    #     torch_dtype=torch.float16,\n",
-    "    #     trust_remote_code=True,\n",
-    "    # )\n",
-    "\n",
-    "\n",
-    "    config = AutoConfig.from_pretrained(model_name, trust_remote_code=True,)\n",
-    "    config.quantization_config['use_exllama'] = False\n",
-    "    # del config.quantization_config['use_exllama']\n",
-    "    config.quantization_config['disable_exllama'] = True\n",
-    "    model = AutoModelForCausalLM.from_pretrained(\n",
-    "        model_name,\n",
-    "        torch_dtype=torch.bfloat16,\n",
-    "        trust_remote_code=True,\n",
-    "        config=config,\n",
-    "    )\n",
-    "    return model\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
-     ]
-    }
-   ],
-   "source": [
-    "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True,)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "MAX_LEN = 2000\n",
-    "samples = json.load(open(\"../samples.json\"))\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Helpers"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# modified from https://github.dev/huggingface/evaluate/blob/8dfe05784099fb9af55b8e77793205a3b7c86465/measurements/perplexity/perplexity.py#L154\n",
-    "\n",
-    "# from evaluate.measurements.perplexity import Perplexity\n",
-    "import evaluate\n",
-    "from evaluate import logging\n",
-    "from torch.nn import CrossEntropyLoss\n",
-    "\n",
-    "# @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)\n",
-    "def perplexity_compute(\n",
-    "    data, model, tokenizer, batch_size: int = 16, add_start_token: bool = True, device=None, max_length=None\n",
-    "):\n",
-    "\n",
-    "    if device is not None:\n",
-    "        assert device in [\"gpu\", \"cpu\", \"cuda\"], \"device should be either gpu or cpu.\"\n",
-    "        if device == \"gpu\":\n",
-    "            device = \"cuda\"\n",
-    "    else:\n",
-    "        device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
-    "\n",
-    "    # model = AutoModelForCausalLM.from_pretrained(model_id)\n",
-    "    model = model.to(device)\n",
-    "\n",
-    "    # tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
-    "\n",
-    "    # if batch_size > 1 (which generally leads to padding being required), and\n",
-    "    # if there is not an already assigned pad_token, assign an existing\n",
-    "    # special token to also be the padding token\n",
-    "    if tokenizer.pad_token is None and batch_size > 1:\n",
-    "        existing_special_tokens = list(tokenizer.special_tokens_map_extended.values())\n",
-    "        # check that the model already has at least one special token defined\n",
-    "        assert (\n",
-    "            len(existing_special_tokens) > 0\n",
-    "        ), \"If batch_size > 1, model must have at least one special token to use for padding. Please use a different model or set batch_size=1.\"\n",
-    "        # assign one of the special tokens to also be the pad token\n",
-    "        tokenizer.add_special_tokens({\"pad_token\": existing_special_tokens[0]})\n",
-    "\n",
-    "    if add_start_token and max_length:\n",
-    "        # leave room for <BOS> token to be added:\n",
-    "        assert (\n",
-    "            tokenizer.bos_token is not None\n",
-    "        ), \"Input model must already have a BOS token if using add_start_token=True. Please use a different model, or set add_start_token=False\"\n",
-    "        max_tokenized_len = max_length - 1\n",
-    "    else:\n",
-    "        max_tokenized_len = max_length\n",
-    "\n",
-    "    encodings = tokenizer(\n",
-    "        data,\n",
-    "        add_special_tokens=False,\n",
-    "        padding=True,\n",
-    "        truncation=True if max_tokenized_len else False,\n",
-    "        max_length=max_tokenized_len,\n",
-    "        return_tensors=\"pt\",\n",
-    "        return_attention_mask=True,\n",
-    "    ).to(device)\n",
-    "\n",
-    "    encoded_texts = encodings[\"input_ids\"]\n",
-    "    attn_masks = encodings[\"attention_mask\"]\n",
-    "\n",
-    "    # check that each input is long enough:\n",
-    "    if add_start_token:\n",
-    "        assert torch.all(torch.ge(attn_masks.sum(1), 1)), \"Each input text must be at least one token long.\"\n",
-    "    else:\n",
-    "        assert torch.all(\n",
-    "            torch.ge(attn_masks.sum(1), 2)\n",
-    "        ), \"When add_start_token=False, each input text must be at least two tokens long. Run with add_start_token=True if inputting strings of only one token, and remove all empty input strings.\"\n",
-    "\n",
-    "    ppls = []\n",
-    "    loss_fct = CrossEntropyLoss(reduction=\"none\")\n",
-    "\n",
-    "    for start_index in logging.tqdm(range(0, len(encoded_texts), batch_size)):\n",
-    "        end_index = min(start_index + batch_size, len(encoded_texts))\n",
-    "        encoded_batch = encoded_texts[start_index:end_index]\n",
-    "        attn_mask = attn_masks[start_index:end_index]\n",
-    "\n",
-    "        if add_start_token:\n",
-    "            bos_tokens_tensor = torch.tensor([[tokenizer.bos_token_id]] * encoded_batch.size(dim=0)).to(device)\n",
-    "            encoded_batch = torch.cat([bos_tokens_tensor, encoded_batch], dim=1)\n",
-    "            attn_mask = torch.cat(\n",
-    "                [torch.ones(bos_tokens_tensor.size(), dtype=torch.int64).to(device), attn_mask], dim=1\n",
-    "            )\n",
-    "\n",
-    "        labels = encoded_batch\n",
-    "\n",
-    "        with torch.no_grad():\n",
-    "            out_logits = model(encoded_batch, attention_mask=attn_mask).logits\n",
-    "\n",
-    "        shift_logits = out_logits[..., :-1, :].contiguous()\n",
-    "        shift_labels = labels[..., 1:].contiguous()\n",
-    "        shift_attention_mask_batch = attn_mask[..., 1:].contiguous()\n",
-    "\n",
-    "        perplexity_batch = torch.exp(\n",
-    "            (loss_fct(shift_logits.transpose(1, 2), shift_labels) * shift_attention_mask_batch).sum(1)\n",
-    "            / shift_attention_mask_batch.sum(1)\n",
-    "        )\n",
-    "\n",
-    "        ppls += perplexity_batch.tolist()\n",
-    "\n",
-    "    return {\"perplexities\": ppls, \"mean_perplexity\": torch.tensor(ppls).mean()}"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Training"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from torch.nn import functional as F\n",
-    "from torch.utils.data import DataLoader, TensorDataset"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Lightning helpers"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sample = samples[0]\n",
-    "s = sample['text']\n",
-    "first_half = s[:len(s)//2]\n",
-    "second_half = s[len(s)//2:]\n",
-    "\n",
-    "\n",
-    "\n",
-    "def str2xya(s, tokenizer):\n",
-    "    max_len = min(MAX_LEN, len(s))\n",
-    "    input_ids = tokenizer(s, return_tensors=\"pt\")[\"input_ids\"][0].tolist()\n",
-    "\n",
-    "    pad = tokenizer.bos_token_id\n",
-    "    # turn it into a sequence\n",
-    "    Xs = []\n",
-    "    Ys = []\n",
-    "    for i in range(1, len(input_ids)):\n",
-    "        x = input_ids[:i][-max_len:]\n",
-    "        padding = max_len - len(x)\n",
-    "        x = [pad]*padding + x\n",
-    "        \n",
-    "        Xs.append(x)\n",
-    "        Ys.append(input_ids[i:i+1])\n",
-    "\n",
-    "    Xs = torch.tensor(Xs)\n",
-    "    Ys = torch.tensor(Ys)\n",
-    "    attention_masks = torch.stack([(x==pad)*1 for x in Xs])\n",
-    "    return Xs, Ys, attention_masks\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def eval(model, tokenizer, second_half):\n",
-    "    model.eval();\n",
-    "    with torch.no_grad():\n",
-    "        with model.disable_adapter():\n",
-    "            results = perplexity_compute(data=second_half, model=model, tokenizer=tokenizer, device='cuda')\n",
-    "        results2 = perplexity_compute(data=second_half, model=model, tokenizer=tokenizer, device='cuda')\n",
-    "    return dict(before=results['mean_perplexity'].item(), after=results2['mean_perplexity'].item())\n",
-    "\n",
-    "def read_metrics_csv(metrics_file_path):\n",
-    "    df_hist = pd.read_csv(metrics_file_path)\n",
-    "    df_hist[\"epoch\"] = df_hist[\"epoch\"].ffill()\n",
-    "    df_histe = df_hist.set_index(\"epoch\").groupby(\"epoch\").mean()\n",
-    "    return df_histe, df_hist\n",
-    "\n",
-    "\n",
-    "def plot_hist(df_hist, allowlist=None, logy=False):\n",
-    "    \"\"\"plot groups of suffixes together\"\"\"\n",
-    "    suffixes = list(set([c.split('/')[-1] for c in df_hist.columns if '/' in c]))\n",
-    "    for suffix in suffixes:\n",
-    "        if allowlist and suffix not in allowlist: continue\n",
-    "        df_hist[[c for c in df_hist.columns if c.endswith(suffix) and '/' in c]].plot(title=suffix, style='.', logy=logy)\n",
-    "        plt.title(suffix)   \n",
-    "        plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "\n",
-    "class PL_MODEL(pl.LightningModule):\n",
-    "    def __init__(self, num_iterations, lr=3e-4, weight_decay=0,):\n",
-    "        super().__init__()\n",
-    "        self.save_hyperparameters()\n",
-    "\n",
-    "    def configure_model(self):\n",
-    "        # instantiate your model in this hook\n",
-    "        peft_config = LoraConfig(\n",
-    "            # task_type=TaskType.TOKEN_CLS, \n",
-    "            target_modules=[ \"fc2\",  \"Wqkv\",],\n",
-    "            inference_mode=False, r=16, lora_alpha=16, \n",
-    "            # lora_dropout=0.1,\n",
-    "            # bias=\"all\"\n",
-    "        )\n",
-    "        self.model = load_model()\n",
-    "        self.model = get_peft_model(self.model, peft_config)\n",
-    "        self.model.config.use_cache = False\n",
-    "    \n",
-    "    def forward(self, **kwargs):\n",
-    "        return self.model(**kwargs)\n",
-    "\n",
-    "    def _shared_step(self, batch, batch_idx, phase='train'):\n",
-    "        input_ids, targets, attention_mask = batch\n",
-    "        # 16, 141\n",
-    "        output = self.forward(input_ids=input_ids, attention_mask=attention_mask)\n",
-    "        loss = F.smooth_l1_loss(output.logits[:, -1], targets)\n",
-    "        self.log(f\"{phase}/loss\", loss, on_epoch=True, on_step=True, prog_bar=True)\n",
-    "        return loss\n",
-    "    \n",
-    "    def training_step(self, batch, batch_idx):\n",
-    "        return self._shared_step(batch, batch_idx, phase='train')\n",
-    "\n",
-    "    def validation_step(self, batch, batch_idx):\n",
-    "        return self._shared_step(batch, batch_idx, phase='val')\n",
-    "    \n",
-    "    def test_step(self, batch, batch_idx, dataloader_idx=0):\n",
-    "        return self._shared_step(batch, batch_idx, phase='test')\n",
-    "    \n",
-    "    def configure_optimizers(self):\n",
-    "        optimizer = optim.AdamW(self.parameters(), lr=self.hparams.lr, weight_decay=self.hparams.weight_decay)\n",
-    "        lr_scheduler = optim.lr_scheduler.OneCycleLR(\n",
-    "            optimizer, self.hparams.lr, total_steps=self.hparams.num_iterations\n",
-    "        )\n",
-    "        return [optimizer], [lr_scheduler]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Train"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "AttributeError",
-     "evalue": "'PL_MODEL' object has no attribute 'model'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[13], line 15\u001b[0m\n\u001b[1;32m     12\u001b[0m epoch_steps \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(dl_train)\n\u001b[1;32m     14\u001b[0m pl_model \u001b[38;5;241m=\u001b[39m PL_MODEL(num_iterations\u001b[38;5;241m=\u001b[39mepoch_steps\u001b[38;5;241m*\u001b[39mepochs, lr\u001b[38;5;241m=\u001b[39mlr, weight_decay\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m)\n\u001b[0;32m---> 15\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mpl_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\n\u001b[1;32m     16\u001b[0m \u001b[38;5;66;03m# from lightning.pytorch.plugins import BitsandbytesPrecision\u001b[39;00m\n\u001b[1;32m     17\u001b[0m \u001b[38;5;66;03m# precision = BitsandbytesPrecision(mode=\"nf4-dq\")\u001b[39;00m\n\u001b[1;32m     18\u001b[0m \u001b[38;5;66;03m# precision = BitsandbytesPrecision(mode=\"int8-training\", dtype=torch.float16, ignore_modules={\"lm_head\"})\u001b[39;00m\n\u001b[1;32m     19\u001b[0m trainer \u001b[38;5;241m=\u001b[39m pl\u001b[38;5;241m.\u001b[39mTrainer(\n\u001b[1;32m     20\u001b[0m         max_epochs\u001b[38;5;241m=\u001b[39mepochs,\n\u001b[1;32m     21\u001b[0m         \u001b[38;5;66;03m# precision=\"bf16-mixed\",\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     24\u001b[0m         \u001b[38;5;66;03m# plugins=precision\u001b[39;00m\n\u001b[1;32m     25\u001b[0m     )\n",
-      "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/bs_writing_detector/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py:1695\u001b[0m, in \u001b[0;36mModule.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m   1693\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m modules:\n\u001b[1;32m   1694\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m modules[name]\n\u001b[0;32m-> 1695\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(\u001b[38;5;28mself\u001b[39m)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m object has no attribute \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
-      "\u001b[0;31mAttributeError\u001b[0m: 'PL_MODEL' object has no attribute 'model'"
-     ]
-    },
-    {
-     "ename": "",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
-     ]
-    }
-   ],
-   "source": [
-    "\n",
-    "\n",
-    "device = 'cuda'\n",
-    "lr = 4e-3\n",
-    "epochs = 3\n",
-    "accum_steps = 16\n",
-    "batch_size = 1\n",
-    "\n",
-    "Xs, Ys, attention_masks = str2xya(first_half, tokenizer)\n",
-    "dl_train = DataLoader(TensorDataset(Xs, Ys, attention_masks), batch_size=batch_size, shuffle=True)\n",
-    "Xs, Ys, attention_masks = str2xya(second_half, tokenizer)\n",
-    "dl_val = DataLoader(TensorDataset(Xs, Ys, attention_masks), batch_size=batch_size, shuffle=False)\n",
-    "\n",
-    "epoch_steps = len(dl_train)\n",
-    "\n",
-    "pl_model = PL_MODEL(num_iterations=epoch_steps*epochs, lr=lr, weight_decay=0)\n",
-    "model = pl_model.model\n",
-    "# from lightning.pytorch.plugins import BitsandbytesPrecision\n",
-    "# precision = BitsandbytesPrecision(mode=\"nf4-dq\")\n",
-    "# precision = BitsandbytesPrecision(mode=\"int8-training\", dtype=torch.float16, ignore_modules={\"lm_head\"})\n",
-    "trainer = pl.Trainer(\n",
-    "        accelerator='cpu',\n",
-    "        max_epochs=epochs,\n",
-    "        precision='',\n",
-    "        # precision=\"bf16-mixed\",\n",
-    "        log_every_n_steps=1,\n",
-    "        accumulate_grad_batches=accum_steps,\n",
-    "        # plugins=precision\n",
-    "    )\n",
-    "\n",
-    "# train\n",
-    "trainer.fit(pl_model, dl_train, dl_val)\n",
-    "\n",
-    "\n",
-    "df_histe, df_hist = read_metrics_csv(trainer.logger.experiment.metrics_file_path).bfill().ffill()\n",
-    "display(df_hist)\n",
-    "plot_hist(df_hist)\n",
-    "\n",
-    "eval(model, tokenizer, second_half)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from torch import optim\n",
-    "\n",
-    "\n",
-    "def lora_eval(model, tokenizer, sample):\n",
-    "    # reset/set adapter\n",
-    "    # peft_config = IA3Config(\n",
-    "    #     target_modules=[ \"fc2\",  \"Wqkv\",], \n",
-    "    #         feedforward_modules=[\"fc2\"],\n",
-    "    #         inference_mode=False,\n",
-    "    # )\n",
-    "    peft_config = LoraConfig(\n",
-    "        # task_type=TaskType.TOKEN_CLS, \n",
-    "        target_modules=[ \"fc2\",  \"Wqkv\",],\n",
-    "        inference_mode=False, r=16, lora_alpha=16, lora_dropout=0.1, bias=\"all\"\n",
-    "    )\n",
-    "    model = get_peft_model(model, peft_config)\n",
-    "    model.config.use_cache = False\n",
-    "\n",
-    "    # train adapter\n",
-    "    s = sample['text']\n",
-    "    first_half = s[:len(s)//2]\n",
-    "    second_half = s[len(s)//2:]\n",
-    "    input_ids = tokenizer(first_half, return_tensors=\"pt\")[\"input_ids\"][0].to('cuda')\n",
-    "\n",
-    "    device = 'cuda'\n",
-    "    lr = 1.0e-2\n",
-    "    epochs = 3\n",
-    "    accum_steps = 64\n",
-    "    epoch_steps = (len(input_ids)-1)//accum_steps+1\n",
-    "\n",
-    "    total_steps = epochs * epoch_steps\n",
-    "    optimizer = torch.optim.SGD(model.parameters(), lr=lr)\n",
-    "    scheduler = optim.lr_scheduler.OneCycleLR(\n",
-    "            optimizer, lr, total_steps=total_steps\n",
-    "    )\n",
-    "    model.train()\n",
-    "    model = model.to(device)\n",
-    "    for epoch in range(epochs):\n",
-    "        # TODO: batch\n",
-    "        \n",
-    "        accum = 0\n",
-    "        for i in range(1, len(input_ids)):\n",
-    "            X = input_ids[:i][None, ]\n",
-    "            targets = input_ids[i:i+1][None, ]\n",
-    "            optimizer.zero_grad()\n",
-    "            out = model(input_ids=X, \n",
-    "                        )\n",
-    "            logits = out['logits'][:, -1]\n",
-    "            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))\n",
-    "            loss.backward()\n",
-    "            if accum > accum_steps:\n",
-    "                optimizer.step()\n",
-    "                scheduler.step()\n",
-    "                optimizer.zero_grad()\n",
-    "                accum = 0\n",
-    "            else:\n",
-    "                accum += 1\n",
-    "        if accum > 0:\n",
-    "            optimizer.step()\n",
-    "            scheduler.step()\n",
-    "            optimizer.zero_grad()\n",
-    "\n",
-    "    return eval(model, tokenizer, second_half)\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data = []\n",
-    "for sample in tqdm(samples):\n",
-    "    r = lora_eval(model, tokenizer, sample)\n",
-    "    print(sample['name'], r)\n",
-    "    r.update(sample)\n",
-    "    data.append(r)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print('perplexity (on 2nd half) before and after training adapter on first half of text')\n",
-    "df = pd.DataFrame(data).set_index('name')\n",
-    "\n",
-    "df['learning'] = (df['before']-df['after'])/df['before']\n",
-    "df.sort_values('learning').drop(columns=['text', 'url'])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Result"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.0rc1"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
@@ -1,682 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "https://github.com/huggingface/peft/blob/main/examples/fp4_finetuning/finetune_fp4_opt_bnb_peft.py"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/media/wassname/SGIronWolf/projects5/bs_writing_detector/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "from torch import optim\n",
-    "import lightning as pl\n",
-    "from matplotlib import pyplot as plt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "\n",
-    "import torch\n",
-    "import torch.nn as nn\n",
-    "import transformers\n",
-    "from datasets import load_dataset\n",
-    "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoConfig\n",
-    "import numpy as np\n",
-    "from tqdm.auto import tqdm\n",
-    "import pandas as pd\n",
-    "import warnings\n",
-    "from peft import LoraConfig, get_peft_model, IA3Config"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.style.use('ggplot')\n",
-    "torch.set_float32_matmul_precision('medium')\n",
-    "warnings.filterwarnings(\"ignore\", \".*does not have many workers.*\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
-    "\n",
-    "model_name = \"microsoft/phi-2\"\n",
-    "\n",
-    "# model = AutoModelForCausalLM.from_pretrained(\n",
-    "#     model_name,\n",
-    "#     # max_memory=max_memory,\n",
-    "#     quantization_config=BitsAndBytesConfig(\n",
-    "#         load_in_4bit=True,\n",
-    "#         llm_int8_threshold=6.0,\n",
-    "#         llm_int8_has_fp16_weight=False,\n",
-    "#         bnb_4bit_compute_dtype=torch.float16,\n",
-    "#         bnb_4bit_use_double_quant=True,\n",
-    "#         bnb_4bit_quant_type=\"nf4\",\n",
-    "#     ),\n",
-    "#     torch_dtype=torch.float16,\n",
-    "#     trust_remote_code=True,\n",
-    "# )\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model_name = \"microsoft/phi-2\"\n",
-    "\n",
-    "def load_model():\n",
-    "\n",
-    "    model = AutoModelForCausalLM.from_pretrained(\n",
-    "        model_name,\n",
-    "        # torch_dtype=torch.float16,\n",
-    "        trust_remote_code=True,\n",
-    "    )\n",
-    "    return model\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
-     ]
-    }
-   ],
-   "source": [
-    "tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True,)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "MAX_LEN = 2000\n",
-    "samples = json.load(open(\"../samples.json\"))\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Helpers"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# modified from https://github.dev/huggingface/evaluate/blob/8dfe05784099fb9af55b8e77793205a3b7c86465/measurements/perplexity/perplexity.py#L154\n",
-    "\n",
-    "# from evaluate.measurements.perplexity import Perplexity\n",
-    "import evaluate\n",
-    "from evaluate import logging\n",
-    "from torch.nn import CrossEntropyLoss\n",
-    "\n",
-    "# @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)\n",
-    "def perplexity_compute(\n",
-    "    data, model, tokenizer, batch_size: int = 16, add_start_token: bool = True, device=None, max_length=None\n",
-    "):\n",
-    "\n",
-    "    if device is not None:\n",
-    "        assert device in [\"gpu\", \"cpu\", \"cuda\"], \"device should be either gpu or cpu.\"\n",
-    "        if device == \"gpu\":\n",
-    "            device = \"cuda\"\n",
-    "    else:\n",
-    "        device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
-    "\n",
-    "    # model = AutoModelForCausalLM.from_pretrained(model_id)\n",
-    "    model = model.to(device)\n",
-    "\n",
-    "    # tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
-    "\n",
-    "    # if batch_size > 1 (which generally leads to padding being required), and\n",
-    "    # if there is not an already assigned pad_token, assign an existing\n",
-    "    # special token to also be the padding token\n",
-    "    if tokenizer.pad_token is None and batch_size > 1:\n",
-    "        existing_special_tokens = list(tokenizer.special_tokens_map_extended.values())\n",
-    "        # check that the model already has at least one special token defined\n",
-    "        assert (\n",
-    "            len(existing_special_tokens) > 0\n",
-    "        ), \"If batch_size > 1, model must have at least one special token to use for padding. Please use a different model or set batch_size=1.\"\n",
-    "        # assign one of the special tokens to also be the pad token\n",
-    "        tokenizer.add_special_tokens({\"pad_token\": existing_special_tokens[0]})\n",
-    "\n",
-    "    if add_start_token and max_length:\n",
-    "        # leave room for <BOS> token to be added:\n",
-    "        assert (\n",
-    "            tokenizer.bos_token is not None\n",
-    "        ), \"Input model must already have a BOS token if using add_start_token=True. Please use a different model, or set add_start_token=False\"\n",
-    "        max_tokenized_len = max_length - 1\n",
-    "    else:\n",
-    "        max_tokenized_len = max_length\n",
-    "\n",
-    "    encodings = tokenizer(\n",
-    "        data,\n",
-    "        add_special_tokens=False,\n",
-    "        padding=True,\n",
-    "        truncation=True if max_tokenized_len else False,\n",
-    "        max_length=max_tokenized_len,\n",
-    "        return_tensors=\"pt\",\n",
-    "        return_attention_mask=True,\n",
-    "    ).to(device)\n",
-    "\n",
-    "    encoded_texts = encodings[\"input_ids\"]\n",
-    "    attn_masks = encodings[\"attention_mask\"]\n",
-    "\n",
-    "    # check that each input is long enough:\n",
-    "    if add_start_token:\n",
-    "        assert torch.all(torch.ge(attn_masks.sum(1), 1)), \"Each input text must be at least one token long.\"\n",
-    "    else:\n",
-    "        assert torch.all(\n",
-    "            torch.ge(attn_masks.sum(1), 2)\n",
-    "        ), \"When add_start_token=False, each input text must be at least two tokens long. Run with add_start_token=True if inputting strings of only one token, and remove all empty input strings.\"\n",
-    "\n",
-    "    ppls = []\n",
-    "    loss_fct = CrossEntropyLoss(reduction=\"none\")\n",
-    "\n",
-    "    for start_index in logging.tqdm(range(0, len(encoded_texts), batch_size)):\n",
-    "        end_index = min(start_index + batch_size, len(encoded_texts))\n",
-    "        encoded_batch = encoded_texts[start_index:end_index]\n",
-    "        attn_mask = attn_masks[start_index:end_index]\n",
-    "\n",
-    "        if add_start_token:\n",
-    "            bos_tokens_tensor = torch.tensor([[tokenizer.bos_token_id]] * encoded_batch.size(dim=0)).to(device)\n",
-    "            encoded_batch = torch.cat([bos_tokens_tensor, encoded_batch], dim=1)\n",
-    "            attn_mask = torch.cat(\n",
-    "                [torch.ones(bos_tokens_tensor.size(), dtype=torch.int64).to(device), attn_mask], dim=1\n",
-    "            )\n",
-    "\n",
-    "        labels = encoded_batch\n",
-    "\n",
-    "        with torch.no_grad():\n",
-    "            out_logits = model(encoded_batch, attention_mask=attn_mask).logits\n",
-    "\n",
-    "        shift_logits = out_logits[..., :-1, :].contiguous()\n",
-    "        shift_labels = labels[..., 1:].contiguous()\n",
-    "        shift_attention_mask_batch = attn_mask[..., 1:].contiguous()\n",
-    "\n",
-    "        perplexity_batch = torch.exp(\n",
-    "            (loss_fct(shift_logits.transpose(1, 2), shift_labels) * shift_attention_mask_batch).sum(1)\n",
-    "            / shift_attention_mask_batch.sum(1)\n",
-    "        )\n",
-    "\n",
-    "        ppls += perplexity_batch.tolist()\n",
-    "\n",
-    "    return {\"perplexities\": ppls, \"mean_perplexity\": torch.tensor(ppls).mean()}"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Training"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from torch.nn import functional as F\n",
-    "from torch.utils.data import DataLoader, TensorDataset"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Lightning helpers"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sample = samples[0]\n",
-    "s = sample['text']\n",
-    "first_half = s[:len(s)//2]\n",
-    "second_half = s[len(s)//2:]\n",
-    "\n",
-    "\n",
-    "\n",
-    "def str2xya(s, tokenizer):\n",
-    "    max_len = min(MAX_LEN, len(s))\n",
-    "    input_ids = tokenizer(s, return_tensors=\"pt\")[\"input_ids\"][0].tolist()\n",
-    "\n",
-    "    pad = tokenizer.bos_token_id\n",
-    "    # turn it into a sequence\n",
-    "    Xs = []\n",
-    "    Ys = []\n",
-    "    for i in range(1, len(input_ids)):\n",
-    "        x = input_ids[:i][-max_len:]\n",
-    "        padding = max_len - len(x)\n",
-    "        x = [pad]*padding + x\n",
-    "        \n",
-    "        Xs.append(x)\n",
-    "        Ys.append(input_ids[i:i+1])\n",
-    "\n",
-    "    Xs = torch.tensor(Xs)\n",
-    "    Ys = torch.tensor(Ys)\n",
-    "    attention_masks = torch.stack([(x==pad)*1 for x in Xs])\n",
-    "    return Xs, Ys, attention_masks\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def eval(model, tokenizer, second_half):\n",
-    "    model.eval();\n",
-    "    with torch.no_grad():\n",
-    "        with model.disable_adapter():\n",
-    "            results = perplexity_compute(data=second_half, model=model, tokenizer=tokenizer, device='cuda')\n",
-    "        results2 = perplexity_compute(data=second_half, model=model, tokenizer=tokenizer, device='cuda')\n",
-    "    return dict(before=results['mean_perplexity'].item(), after=results2['mean_perplexity'].item())\n",
-    "\n",
-    "def read_metrics_csv(metrics_file_path):\n",
-    "    df_hist = pd.read_csv(metrics_file_path)\n",
-    "    df_hist[\"epoch\"] = df_hist[\"epoch\"].ffill()\n",
-    "    df_histe = df_hist.set_index(\"epoch\").groupby(\"epoch\").mean()\n",
-    "    return df_histe, df_hist\n",
-    "\n",
-    "\n",
-    "def plot_hist(df_hist, allowlist=None, logy=False):\n",
-    "    \"\"\"plot groups of suffixes together\"\"\"\n",
-    "    suffixes = list(set([c.split('/')[-1] for c in df_hist.columns if '/' in c]))\n",
-    "    for suffix in suffixes:\n",
-    "        if allowlist and suffix not in allowlist: continue\n",
-    "        df_hist[[c for c in df_hist.columns if c.endswith(suffix) and '/' in c]].plot(title=suffix, style='.', logy=logy)\n",
-    "        plt.title(suffix)   \n",
-    "        plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import bitsandbytes as bnb\n",
-    "\n",
-    "class PL_MODEL(pl.LightningModule):\n",
-    "    def __init__(self, num_iterations, lr=3e-4, weight_decay=0,):\n",
-    "        super().__init__()\n",
-    "        self.save_hyperparameters()\n",
-    "        self.configure_model()\n",
-    "\n",
-    "    def configure_model(self):\n",
-    "        # instantiate your model in this hook\n",
-    "        peft_config = LoraConfig(\n",
-    "            # task_type=TaskType.TOKEN_CLS, \n",
-    "            target_modules=[ \"fc2\",  \"Wqkv\",],\n",
-    "            inference_mode=False, r=16, lora_alpha=16, \n",
-    "            # lora_dropout=0.1,\n",
-    "            # bias=\"all\"\n",
-    "        )\n",
-    "        self.model = load_model()\n",
-    "        self.model = get_peft_model(self.model, peft_config)\n",
-    "        self.model.config.use_cache = False\n",
-    "    \n",
-    "    def forward(self, **kwargs):\n",
-    "        return self.model(**kwargs)\n",
-    "\n",
-    "    def _shared_step(self, batch, batch_idx, phase='train'):\n",
-    "        input_ids, targets, attention_mask = batch\n",
-    "        # 16, 141\n",
-    "        output = self.forward(input_ids=input_ids, attention_mask=attention_mask)\n",
-    "        loss = F.smooth_l1_loss(output.logits[:, -1], targets)\n",
-    "        self.log(f\"{phase}/loss\", loss, on_epoch=True, on_step=True, prog_bar=True)\n",
-    "        return loss\n",
-    "    \n",
-    "    def training_step(self, batch, batch_idx):\n",
-    "        return self._shared_step(batch, batch_idx, phase='train')\n",
-    "\n",
-    "    def validation_step(self, batch, batch_idx):\n",
-    "        return self._shared_step(batch, batch_idx, phase='val')\n",
-    "    \n",
-    "    def test_step(self, batch, batch_idx, dataloader_idx=0):\n",
-    "        return self._shared_step(batch, batch_idx, phase='test')\n",
-    "    \n",
-    "    def configure_optimizers(self):\n",
-    "        # optimizer = optim.AdamW(self.parameters(), lr=self.hparams.lr, weight_decay=self.hparams.weight_decay)\n",
-    "\n",
-    "        optimizer = bnb.optim.AdamW4bit(self.parameters(), lr=self.hparams.lr, betas=(0.9, 0.995))\n",
-    "        lr_scheduler = optim.lr_scheduler.OneCycleLR(\n",
-    "            optimizer, self.hparams.lr, total_steps=self.hparams.num_iterations\n",
-    "        )\n",
-    "        return [optimizer], [lr_scheduler]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Train"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.10it/s]\n",
-      "Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.\n",
-      "GPU available: True (cuda), used: True\n",
-      "TPU available: False, using: 0 TPU cores\n",
-      "IPU available: False, using: 0 IPUs\n",
-      "HPU available: False, using: 0 HPUs\n"
-     ]
-    },
-    {
-     "ename": "TypeError",
-     "evalue": "Linear4bit.__init__() got an unexpected keyword argument 'dtype'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[18], line 29\u001b[0m\n\u001b[1;32m     18\u001b[0m trainer \u001b[38;5;241m=\u001b[39m pl\u001b[38;5;241m.\u001b[39mTrainer(\n\u001b[1;32m     19\u001b[0m         accelerator\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgpu\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m     20\u001b[0m         max_epochs\u001b[38;5;241m=\u001b[39mepochs,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     25\u001b[0m         plugins\u001b[38;5;241m=\u001b[39mprecision\n\u001b[1;32m     26\u001b[0m     )\n\u001b[1;32m     28\u001b[0m \u001b[38;5;66;03m# train\u001b[39;00m\n\u001b[0;32m---> 29\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpl_model\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdl_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdl_val\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     31\u001b[0m model \u001b[38;5;241m=\u001b[39m pl_model\u001b[38;5;241m.\u001b[39mmodel\n\u001b[1;32m     33\u001b[0m df_histe, df_hist \u001b[38;5;241m=\u001b[39m read_metrics_csv(trainer\u001b[38;5;241m.\u001b[39mlogger\u001b[38;5;241m.\u001b[39mexperiment\u001b[38;5;241m.\u001b[39mmetrics_file_path)\u001b[38;5;241m.\u001b[39mbfill()\u001b[38;5;241m.\u001b[39mffill()\n",
-      "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/bs_writing_detector/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/trainer.py:544\u001b[0m, in \u001b[0;36mTrainer.fit\u001b[0;34m(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)\u001b[0m\n\u001b[1;32m    542\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mstatus \u001b[38;5;241m=\u001b[39m TrainerStatus\u001b[38;5;241m.\u001b[39mRUNNING\n\u001b[1;32m    543\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 544\u001b[0m \u001b[43mcall\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_and_handle_interrupt\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    545\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fit_impl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrain_dataloaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mval_dataloaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdatamodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mckpt_path\u001b[49m\n\u001b[1;32m    546\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/bs_writing_detector/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/call.py:44\u001b[0m, in \u001b[0;36m_call_and_handle_interrupt\u001b[0;34m(trainer, trainer_fn, *args, **kwargs)\u001b[0m\n\u001b[1;32m     42\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m trainer\u001b[38;5;241m.\u001b[39mstrategy\u001b[38;5;241m.\u001b[39mlauncher \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m     43\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m trainer\u001b[38;5;241m.\u001b[39mstrategy\u001b[38;5;241m.\u001b[39mlauncher\u001b[38;5;241m.\u001b[39mlaunch(trainer_fn, \u001b[38;5;241m*\u001b[39margs, trainer\u001b[38;5;241m=\u001b[39mtrainer, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m---> 44\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtrainer_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     46\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m _TunerExitException:\n\u001b[1;32m     47\u001b[0m     _call_teardown_hook(trainer)\n",
-      "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/bs_writing_detector/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/trainer.py:580\u001b[0m, in \u001b[0;36mTrainer._fit_impl\u001b[0;34m(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)\u001b[0m\n\u001b[1;32m    573\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mfn \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    574\u001b[0m ckpt_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_checkpoint_connector\u001b[38;5;241m.\u001b[39m_select_ckpt_path(\n\u001b[1;32m    575\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mfn,\n\u001b[1;32m    576\u001b[0m     ckpt_path,\n\u001b[1;32m    577\u001b[0m     model_provided\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m    578\u001b[0m     model_connected\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlightning_module \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m    579\u001b[0m )\n\u001b[0;32m--> 580\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mckpt_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mckpt_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    582\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mstopped\n\u001b[1;32m    583\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n",
-      "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/bs_writing_detector/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/trainer.py:958\u001b[0m, in \u001b[0;36mTrainer._run\u001b[0;34m(self, model, ckpt_path)\u001b[0m\n\u001b[1;32m    955\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_checkpoint_connector\u001b[38;5;241m.\u001b[39m_restore_modules_and_callbacks(ckpt_path)\n\u001b[1;32m    957\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: configuring model\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 958\u001b[0m \u001b[43mcall\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_configure_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m    960\u001b[0m \u001b[38;5;66;03m# reset logger connector\u001b[39;00m\n\u001b[1;32m    961\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_logger_connector\u001b[38;5;241m.\u001b[39mreset_results()\n",
-      "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/bs_writing_detector/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/call.py:109\u001b[0m, in \u001b[0;36m_call_configure_model\u001b[0;34m(trainer)\u001b[0m\n\u001b[1;32m    107\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_overridden(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mconfigure_model\u001b[39m\u001b[38;5;124m\"\u001b[39m, trainer\u001b[38;5;241m.\u001b[39mlightning_module):\n\u001b[1;32m    108\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m trainer\u001b[38;5;241m.\u001b[39mstrategy\u001b[38;5;241m.\u001b[39mtensor_init_context(), trainer\u001b[38;5;241m.\u001b[39mstrategy\u001b[38;5;241m.\u001b[39mmodel_sharded_context(), trainer\u001b[38;5;241m.\u001b[39mprecision_plugin\u001b[38;5;241m.\u001b[39mmodule_init_context():  \u001b[38;5;66;03m# noqa: E501\u001b[39;00m\n\u001b[0;32m--> 109\u001b[0m         \u001b[43m_call_lightning_module_hook\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrainer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mconfigure_model\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/bs_writing_detector/.venv/lib/python3.11/site-packages/lightning/pytorch/trainer/call.py:157\u001b[0m, in \u001b[0;36m_call_lightning_module_hook\u001b[0;34m(trainer, hook_name, pl_module, *args, **kwargs)\u001b[0m\n\u001b[1;32m    154\u001b[0m pl_module\u001b[38;5;241m.\u001b[39m_current_fx_name \u001b[38;5;241m=\u001b[39m hook_name\n\u001b[1;32m    156\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m trainer\u001b[38;5;241m.\u001b[39mprofiler\u001b[38;5;241m.\u001b[39mprofile(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[LightningModule]\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpl_module\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mhook_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m--> 157\u001b[0m     output \u001b[38;5;241m=\u001b[39m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    159\u001b[0m \u001b[38;5;66;03m# restore current_fx when nested context\u001b[39;00m\n\u001b[1;32m    160\u001b[0m pl_module\u001b[38;5;241m.\u001b[39m_current_fx_name \u001b[38;5;241m=\u001b[39m prev_fx_name\n",
-      "Cell \u001b[0;32mIn[17], line 18\u001b[0m, in \u001b[0;36mPL_MODEL.configure_model\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconfigure_model\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m     10\u001b[0m     \u001b[38;5;66;03m# instantiate your model in this hook\u001b[39;00m\n\u001b[1;32m     11\u001b[0m     peft_config \u001b[38;5;241m=\u001b[39m LoraConfig(\n\u001b[1;32m     12\u001b[0m         \u001b[38;5;66;03m# task_type=TaskType.TOKEN_CLS, \u001b[39;00m\n\u001b[1;32m     13\u001b[0m         target_modules\u001b[38;5;241m=\u001b[39m[ \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfc2\u001b[39m\u001b[38;5;124m\"\u001b[39m,  \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWqkv\u001b[39m\u001b[38;5;124m\"\u001b[39m,],\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     16\u001b[0m         \u001b[38;5;66;03m# bias=\"all\"\u001b[39;00m\n\u001b[1;32m     17\u001b[0m     )\n\u001b[0;32m---> 18\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel \u001b[38;5;241m=\u001b[39m \u001b[43mload_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     19\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel \u001b[38;5;241m=\u001b[39m get_peft_model(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel, peft_config)\n\u001b[1;32m     20\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39muse_cache \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n",
-      "Cell \u001b[0;32mIn[5], line 5\u001b[0m, in \u001b[0;36mload_model\u001b[0;34m()\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_model\u001b[39m():\n\u001b[0;32m----> 5\u001b[0m     model \u001b[38;5;241m=\u001b[39m \u001b[43mAutoModelForCausalLM\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      6\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmodel_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# torch_dtype=torch.float16,\u001b[39;49;00m\n\u001b[1;32m      8\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtrust_remote_code\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     10\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m model\n",
-      "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/bs_writing_detector/.venv/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py:560\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m    558\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    559\u001b[0m         \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mregister(config\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m, model_class, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m--> 560\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_class\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    561\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhub_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m    562\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    563\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(config) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m    564\u001b[0m     model_class \u001b[38;5;241m=\u001b[39m _get_model_class(config, \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping)\n",
-      "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/bs_writing_detector/.venv/lib/python3.11/site-packages/transformers/modeling_utils.py:3085\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m   3082\u001b[0m     config \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_check_and_enable_flash_attn_2(config, torch_dtype\u001b[38;5;241m=\u001b[39mtorch_dtype, device_map\u001b[38;5;241m=\u001b[39mdevice_map)\n\u001b[1;32m   3084\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m ContextManagers(init_contexts):\n\u001b[0;32m-> 3085\u001b[0m     model \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3087\u001b[0m \u001b[38;5;66;03m# Check first if we are `from_pt`\u001b[39;00m\n\u001b[1;32m   3088\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_keep_in_fp32_modules:\n",
-      "File \u001b[0;32m~/.cache/huggingface/modules/transformers_modules/microsoft/phi-2/d3186761bf5c4409f7679359284066c25ab668ee/modeling_phi.py:933\u001b[0m, in \u001b[0;36mPhiForCausalLM.__init__\u001b[0;34m(self, config)\u001b[0m\n\u001b[1;32m    930\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, config: PhiConfig) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    931\u001b[0m     \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(config)\n\u001b[0;32m--> 933\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtransformer \u001b[38;5;241m=\u001b[39m \u001b[43mPhiModel\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    934\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlm_head \u001b[38;5;241m=\u001b[39m CausalLMHead(config)\n\u001b[1;32m    935\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mloss \u001b[38;5;241m=\u001b[39m CausalLMLoss()\n",
-      "File \u001b[0;32m~/.cache/huggingface/modules/transformers_modules/microsoft/phi-2/d3186761bf5c4409f7679359284066c25ab668ee/modeling_phi.py:896\u001b[0m, in \u001b[0;36mPhiModel.__init__\u001b[0;34m(self, config)\u001b[0m\n\u001b[1;32m    893\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(config)\n\u001b[1;32m    895\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39membd \u001b[38;5;241m=\u001b[39m Embedding(config)\n\u001b[0;32m--> 896\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mh \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mModuleList(\u001b[43m[\u001b[49m\u001b[43mParallelBlock\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblock_idx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mi\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mrange\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_layer\u001b[49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m)\n\u001b[1;32m    897\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgradient_checkpointing \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m    898\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpost_init()\n",
-      "File \u001b[0;32m~/.cache/huggingface/modules/transformers_modules/microsoft/phi-2/d3186761bf5c4409f7679359284066c25ab668ee/modeling_phi.py:896\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    893\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(config)\n\u001b[1;32m    895\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39membd \u001b[38;5;241m=\u001b[39m Embedding(config)\n\u001b[0;32m--> 896\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mh \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mModuleList([\u001b[43mParallelBlock\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblock_idx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mi\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(config\u001b[38;5;241m.\u001b[39mn_layer)])\n\u001b[1;32m    897\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgradient_checkpointing \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m    898\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpost_init()\n",
-      "File \u001b[0;32m~/.cache/huggingface/modules/transformers_modules/microsoft/phi-2/d3186761bf5c4409f7679359284066c25ab668ee/modeling_phi.py:757\u001b[0m, in \u001b[0;36mParallelBlock.__init__\u001b[0;34m(self, config, block_idx)\u001b[0m\n\u001b[1;32m    754\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresid_dropout \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mDropout(config\u001b[38;5;241m.\u001b[39mresid_pdrop)\n\u001b[1;32m    755\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mblock_idx \u001b[38;5;241m=\u001b[39m block_idx\n\u001b[0;32m--> 757\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmixer \u001b[38;5;241m=\u001b[39m \u001b[43mMHA\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlayer_idx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mblock_idx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    758\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmlp \u001b[38;5;241m=\u001b[39m MLP(config)\n",
-      "File \u001b[0;32m~/.cache/huggingface/modules/transformers_modules/microsoft/phi-2/d3186761bf5c4409f7679359284066c25ab668ee/modeling_phi.py:562\u001b[0m, in \u001b[0;36mMHA.__init__\u001b[0;34m(self, config, dtype, device, rotary_dim, rotary_base, rotary_scale_base, n_head, n_head_kv, head_dim, bias, causal, softmax_scale, layer_idx, return_residual, checkpointing)\u001b[0m\n\u001b[1;32m    559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m linear_cls \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    560\u001b[0m     linear_cls \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mLinear\n\u001b[0;32m--> 562\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mWqkv \u001b[38;5;241m=\u001b[39m \u001b[43mlinear_cls\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhidden_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbias\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbias\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    563\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mout_proj \u001b[38;5;241m=\u001b[39m linear_cls(hidden_size, hidden_size, bias\u001b[38;5;241m=\u001b[39mbias, device\u001b[38;5;241m=\u001b[39mdevice, dtype\u001b[38;5;241m=\u001b[39mdtype)\n\u001b[1;32m    565\u001b[0m \u001b[38;5;66;03m# Attention\u001b[39;00m\n",
-      "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/bs_writing_detector/.venv/lib/python3.11/site-packages/lightning/fabric/plugins/precision/bitsandbytes.py:253\u001b[0m, in \u001b[0;36m_import_bitsandbytes.<locals>._NF4DQLinear.__init__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    252\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 253\u001b[0m     \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquant_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mnf4\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcompress_statistics\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/bs_writing_detector/.venv/lib/python3.11/site-packages/lightning/fabric/plugins/precision/bitsandbytes.py:213\u001b[0m, in \u001b[0;36m_import_bitsandbytes.<locals>._Linear4bit.__init__\u001b[0;34m(self, device, *args, **kwargs)\u001b[0m\n\u001b[1;32m    212\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs: Any, device: Optional[_DEVICE] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 213\u001b[0m     \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    214\u001b[0m     \u001b[38;5;66;03m# if the device is CUDA or we are under a CUDA context manager, quantize the weight here, so we don't end up\u001b[39;00m\n\u001b[1;32m    215\u001b[0m     \u001b[38;5;66;03m# filling the device memory with float32 weights which could lead to OOM\u001b[39;00m\n\u001b[1;32m    216\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mtensor(\u001b[38;5;241m0\u001b[39m, device\u001b[38;5;241m=\u001b[39mdevice)\u001b[38;5;241m.\u001b[39mdevice\u001b[38;5;241m.\u001b[39mtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
-      "\u001b[0;31mTypeError\u001b[0m: Linear4bit.__init__() got an unexpected keyword argument 'dtype'"
-     ]
-    }
-   ],
-   "source": [
-    "\n",
-    "    \n",
-    "\n",
-    "\n",
-    "\n",
-    "device = 'cuda'\n",
-    "lr = 4e-3\n",
-    "epochs = 3\n",
-    "accum_steps = 16\n",
-    "batch_size = 2\n",
-    "\n",
-    "Xs, Ys, attention_masks = str2xya(first_half, tokenizer)\n",
-    "dl_train = DataLoader(TensorDataset(Xs, Ys, attention_masks), batch_size=batch_size, shuffle=True)\n",
-    "Xs, Ys, attention_masks = str2xya(second_half, tokenizer)\n",
-    "dl_val = DataLoader(TensorDataset(Xs, Ys, attention_masks), batch_size=batch_size, shuffle=False)\n",
-    "\n",
-    "epoch_steps = len(dl_train)\n",
-    "\n",
-    "pl_model = PL_MODEL(num_iterations=epoch_steps*epochs, lr=lr, weight_decay=0)\n",
-    "from lightning.pytorch.plugins import BitsandbytesPrecision\n",
-    "precision = BitsandbytesPrecision(mode=\"nf4-dq\")\n",
-    "# precision = BitsandbytesPrecision(mode=\"int8-training\", dtype=torch.float16, ignore_modules={\"lm_head\"})\n",
-    "trainer = pl.Trainer(\n",
-    "        accelerator='gpu',\n",
-    "        max_epochs=epochs,\n",
-    "        # precision='',\n",
-    "        # precision=\"bf16-mixed\",\n",
-    "        log_every_n_steps=1,\n",
-    "        accumulate_grad_batches=accum_steps,\n",
-    "        plugins=precision\n",
-    "    )\n",
-    "\n",
-    "# train\n",
-    "trainer.fit(pl_model, dl_train, dl_val)\n",
-    "\n",
-    "model = pl_model.model\n",
-    "\n",
-    "df_histe, df_hist = read_metrics_csv(trainer.logger.experiment.metrics_file_path).bfill().ffill()\n",
-    "display(df_hist)\n",
-    "plot_hist(df_hist)\n",
-    "\n",
-    "eval(model, tokenizer, second_half)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "1/0"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Old"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from torch import optim\n",
-    "\n",
-    "\n",
-    "def lora_eval(model, tokenizer, sample):\n",
-    "    # reset/set adapter\n",
-    "    # peft_config = IA3Config(\n",
-    "    #     target_modules=[ \"fc2\",  \"Wqkv\",], \n",
-    "    #         feedforward_modules=[\"fc2\"],\n",
-    "    #         inference_mode=False,\n",
-    "    # )\n",
-    "    peft_config = LoraConfig(\n",
-    "        # task_type=TaskType.TOKEN_CLS, \n",
-    "        target_modules=[ \"fc2\",  \"Wqkv\",],\n",
-    "        inference_mode=False, r=16, lora_alpha=16, lora_dropout=0.1, bias=\"all\"\n",
-    "    )\n",
-    "    model = get_peft_model(model, peft_config)\n",
-    "    model.config.use_cache = False\n",
-    "\n",
-    "    # train adapter\n",
-    "    s = sample['text']\n",
-    "    first_half = s[:len(s)//2]\n",
-    "    second_half = s[len(s)//2:]\n",
-    "    input_ids = tokenizer(first_half, return_tensors=\"pt\")[\"input_ids\"][0].to('cuda')\n",
-    "\n",
-    "    device = 'cuda'\n",
-    "    lr = 1.0e-2\n",
-    "    epochs = 3\n",
-    "    accum_steps = 64\n",
-    "    epoch_steps = (len(input_ids)-1)//accum_steps+1\n",
-    "\n",
-    "    total_steps = epochs * epoch_steps\n",
-    "    optimizer = torch.optim.SGD(model.parameters(), lr=lr)\n",
-    "    scheduler = optim.lr_scheduler.OneCycleLR(\n",
-    "            optimizer, lr, total_steps=total_steps\n",
-    "    )\n",
-    "    model.train()\n",
-    "    model = model.to(device)\n",
-    "    for epoch in range(epochs):\n",
-    "        # TODO: batch\n",
-    "        \n",
-    "        accum = 0\n",
-    "        for i in range(1, len(input_ids)):\n",
-    "            X = input_ids[:i][None, ]\n",
-    "            targets = input_ids[i:i+1][None, ]\n",
-    "            optimizer.zero_grad()\n",
-    "            out = model(input_ids=X, \n",
-    "                        )\n",
-    "            logits = out['logits'][:, -1]\n",
-    "            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))\n",
-    "            loss.backward()\n",
-    "            if accum > accum_steps:\n",
-    "                optimizer.step()\n",
-    "                scheduler.step()\n",
-    "                optimizer.zero_grad()\n",
-    "                accum = 0\n",
-    "            else:\n",
-    "                accum += 1\n",
-    "        if accum > 0:\n",
-    "            optimizer.step()\n",
-    "            scheduler.step()\n",
-    "            optimizer.zero_grad()\n",
-    "\n",
-    "    return eval(model, tokenizer, second_half)\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data = []\n",
-    "for sample in tqdm(samples):\n",
-    "    r = lora_eval(model, tokenizer, sample)\n",
-    "    print(sample['name'], r)\n",
-    "    r.update(sample)\n",
-    "    data.append(r)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print('perplexity (on 2nd half) before and after training adapter on first half of text')\n",
-    "df = pd.DataFrame(data).set_index('name')\n",
-    "\n",
-    "df['learning'] = (df['before']-df['after'])/df['before']\n",
-    "df.sort_values('learning').drop(columns=['text', 'url'])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Result"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.0rc1"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}