Files
detect_bs_text/nbs/02_detection_using_tldr_prompt.ipynb
2024-01-03 14:11:20 +08:00

940 lines
35 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"An attempt to measure suprise in text using adapters\n",
"\n",
"https://github.com/huggingface/peft/blob/main/examples/fp4_finetuning/finetune_fp4_opt_bnb_peft.py"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/media/wassname/SGIronWolf/projects5/bs_writing_detector/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"import os\n",
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
"import torch\n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"import torch\n",
"import torch.nn as nn\n",
"import transformers\n",
"from datasets import load_dataset\n",
"from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\n",
"\n",
"# from peft import LoraConfig, get_peft_model\n",
"\n",
"\n",
"# os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'name': 'bad_ml',\n",
" 'url': 'https://arxiv.org/abs/2312.10868',\n",
" 'text': 'This roadmap survey has embarked on an exploration of the\\ntransformative trends in generative AI research, particularly focusing on speculated advancements like Q* and the progressive strides towards AGI. Our analysis highlights a crucial paradigm shift, driven by innovations such as MoE, multi-modal learning, and the pursuit of AGI. These advancements signal a future where AI systems could significantly extend their capabilities in reasoning, contextual understanding, and creative problem-solving. This study reflects on AIs dual potential to either contribute to or impede global equity and justice. The equitable distribution of AI benefits and its role in decision-making processes raise crucial questions about fairness and inclusivity. It is imperative to thoughtfully integrate AI into societal structures to enhance justice and reduce disparities. Despite these advancements, several open questions and research gaps remain. These include ensuring the ethical alignment of advanced AI systems with human values and societal norms, a challenge compounded by their increasing autonomy. The safety and robustness of AGI systems in diverse environments also remain a significant research gap. Addressing these challenges requires a multidisciplinary approach, incorporating ethical, social, and philosophical perspectives. Our survey has highlighted key areas for future inter-disciplinary research in AI, emphasizing the integration of ethical, sociological, and technical perspectives. This approach will foster collaborative research, bridging the gap between technological advancement and societal needs, ensuring that AI development is aligned with human values and global welfare. The roles of MoE, multimodal, and AGI in reshaping generative AI have been identified as significant, as their advancements can enhance model performance and versatility, and pave the way for future research in areas like ethical AI alignment and AGI. As we forge ahead, the balance between AI advancements and human creativity is not just a goal but a necessity, ensuring AIs role as a complementary force that amplifies our capacity to innovate and solve complex challenges. Our responsibility is to guide these advancements towards enriching the human experience, aligning technological progress with ethical standards and societal well-being. ',\n",
" 'in_training': False}"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"MAX_LEN = 2000\n",
"import json\n",
"samples = json.load(open(\"../samples.json\"))\n",
"df_samples = pd.DataFrame(samples)\n",
"df_samples['len'] = df_samples['text'].str.len()\n",
"df_samples\n",
"\n",
"\n",
"sample = samples[0]\n",
"sample"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Helpers"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from dotenv import load_dotenv\n",
"\n",
"load_dotenv() "
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Research on representation engineering (RepE) for AI systems revealed new insights for monitoring and control. New methods were proposed, showing potential for safety-related issues. Future work could explore other aspects of AI representations.'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"from openai import OpenAI\n",
"\n",
"def summize(text):\n",
" client = OpenAI()\n",
" content = f\"Make a tl;dr of this text in <280 chars.\\n\\n## Text\\n\\n{text}\\n\\n## Instruction\\n\\nMake a tl;dr of this text in <280 chars. Start with the most important, as extra text will be discarded :\\n\\ntl;dr:\"\n",
" chat_completion = client.chat.completions.create(\n",
" messages=[\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": content,\n",
" }\n",
" ],\n",
" model=\"gpt-4\",\n",
" )\n",
" # print(content)\n",
" r = chat_completion.choices[0].message.content\n",
" return r\n",
"\n",
"r = summize(samples[1][\"text\"])\n",
"r"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# modified from https://github.dev/huggingface/evaluate/blob/8dfe05784099fb9af55b8e77793205a3b7c86465/measurements/perplexity/perplexity.py#L154\n",
"\n",
"# from evaluate.measurements.perplexity import Perplexity\n",
"import evaluate\n",
"from evaluate import logging\n",
"from torch.nn import CrossEntropyLoss\n",
"\n",
"# @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)\n",
"def perplexity_compute(\n",
" data, model, tokenizer, batch_size: int = 16, add_start_token: bool = True, device=None, max_length=None\n",
"):\n",
"\n",
" if device is not None:\n",
" assert device in [\"gpu\", \"cpu\", \"cuda\"], \"device should be either gpu or cpu.\"\n",
" if device == \"gpu\":\n",
" device = \"cuda\"\n",
" else:\n",
" device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
"\n",
" # model = AutoModelForCausalLM.from_pretrained(model_id)\n",
" model = model.to(device)\n",
"\n",
" # tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
"\n",
" # if batch_size > 1 (which generally leads to padding being required), and\n",
" # if there is not an already assigned pad_token, assign an existing\n",
" # special token to also be the padding token\n",
" if tokenizer.pad_token is None and batch_size > 1:\n",
" existing_special_tokens = list(tokenizer.special_tokens_map_extended.values())\n",
" # check that the model already has at least one special token defined\n",
" assert (\n",
" len(existing_special_tokens) > 0\n",
" ), \"If batch_size > 1, model must have at least one special token to use for padding. Please use a different model or set batch_size=1.\"\n",
" # assign one of the special tokens to also be the pad token\n",
" tokenizer.add_special_tokens({\"pad_token\": existing_special_tokens[0]})\n",
"\n",
" if add_start_token and max_length:\n",
" # leave room for <BOS> token to be added:\n",
" assert (\n",
" tokenizer.bos_token is not None\n",
" ), \"Input model must already have a BOS token if using add_start_token=True. Please use a different model, or set add_start_token=False\"\n",
" max_tokenized_len = max_length - 1\n",
" else:\n",
" max_tokenized_len = max_length\n",
"\n",
" encodings = tokenizer(\n",
" data,\n",
" add_special_tokens=False,\n",
" padding=True,\n",
" truncation=True if max_tokenized_len else False,\n",
" max_length=max_tokenized_len,\n",
" return_tensors=\"pt\",\n",
" return_attention_mask=True,\n",
" ).to(device)\n",
"\n",
" encoded_texts = encodings[\"input_ids\"]\n",
" attn_masks = encodings[\"attention_mask\"]\n",
"\n",
" # check that each input is long enough:\n",
" if add_start_token:\n",
" assert torch.all(torch.ge(attn_masks.sum(1), 1)), \"Each input text must be at least one token long.\"\n",
" else:\n",
" assert torch.all(\n",
" torch.ge(attn_masks.sum(1), 2)\n",
" ), \"When add_start_token=False, each input text must be at least two tokens long. Run with add_start_token=True if inputting strings of only one token, and remove all empty input strings.\"\n",
"\n",
" ppls = []\n",
" loss_fct = CrossEntropyLoss(reduction=\"none\")\n",
"\n",
" for start_index in logging.tqdm(range(0, len(encoded_texts), batch_size)):\n",
" end_index = min(start_index + batch_size, len(encoded_texts))\n",
" encoded_batch = encoded_texts[start_index:end_index]\n",
" attn_mask = attn_masks[start_index:end_index]\n",
"\n",
" if add_start_token:\n",
" bos_tokens_tensor = torch.tensor([[tokenizer.bos_token_id]] * encoded_batch.size(dim=0)).to(device)\n",
" encoded_batch = torch.cat([bos_tokens_tensor, encoded_batch], dim=1)\n",
" attn_mask = torch.cat(\n",
" [torch.ones(bos_tokens_tensor.size(), dtype=torch.int64).to(device), attn_mask], dim=1\n",
" )\n",
"\n",
" labels = encoded_batch\n",
"\n",
" with torch.no_grad():\n",
" out_logits = model(encoded_batch, attention_mask=attn_mask).logits\n",
"\n",
" shift_logits = out_logits[..., :-1, :].contiguous()\n",
" shift_labels = labels[..., 1:].contiguous()\n",
" shift_attention_mask_batch = attn_mask[..., 1:].contiguous()\n",
"\n",
" perplexity_batch = torch.exp(\n",
" (loss_fct(shift_logits.transpose(1, 2), shift_labels) * shift_attention_mask_batch).sum(1)\n",
" / shift_attention_mask_batch.sum(1)\n",
" )\n",
"\n",
" ppls += perplexity_batch.tolist()\n",
"\n",
" return {\"perplexities\": ppls, \"mean_perplexity\": np.mean(ppls)}"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForMaskedLM, AutoModelForCausalLM, AutoConfig, PreTrainedTokenizerBase, PreTrainedTokenizer, GPTQConfig, BitsAndBytesConfig\n",
"\n",
"def load_model(model_name):\n",
" trust_remote_code = True\n",
" tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=trust_remote_code)\n",
" config = AutoConfig.from_pretrained(model_name, trust_remote_code=trust_remote_code)\n",
" # print(config)\n",
" if config.quantization_config is not None:\n",
" config.quantization_config['disable_exllama'] = True\n",
" if 'use_exllama' in config.quantization_config:\n",
" del config.quantization_config['use_exllama']\n",
" model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=trust_remote_code, \n",
" config=config,\n",
" )\n",
" return model, tokenizer\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"import gc\n",
"\n",
"def clear_mem():\n",
" torch.cuda.empty_cache()\n",
" gc.collect()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Results"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"models = [\n",
" \"TheBloke/phi-2-GPTQ\",\n",
" # \"TheBloke/Llama-2-7B-GPTQ\",\n",
" # \"TheBloke/Llama-2-13B-GPTQ\",\n",
" # \"TheBloke/Mistral-7B-v0.1-GPTQ\",\n",
"]\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
"CUDA extension not installed.\n",
"CUDA extension not installed.\n",
"Using pad_token, but it is not set yet.\n",
"100%|██████████| 1/1 [00:00<00:00, 3.04it/s]\n",
"100%|██████████| 1/1 [00:00<00:00, 4.38it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"TheBloke/phi-2-GPTQ bad_ml 12.456705093383789 12.279064178466797\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1/1 [00:00<00:00, 5.32it/s]\n",
"100%|██████████| 1/1 [00:00<00:00, 5.06it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"TheBloke/phi-2-GPTQ good_ml 22.6639461517334 22.733510971069336\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1/1 [00:00<00:00, 4.33it/s]\n",
"100%|██████████| 1/1 [00:00<00:00, 3.34it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"TheBloke/phi-2-GPTQ sokal hoax 14.285429000854492 14.33104133605957\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1/1 [00:00<00:00, 4.82it/s]\n",
"100%|██████████| 1/1 [00:00<00:00, 4.79it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"TheBloke/phi-2-GPTQ Theory o. general relativity 20.507640838623047 18.95397186279297\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1/1 [00:00<00:00, 5.21it/s]\n",
"100%|██████████| 1/1 [00:00<00:00, 5.37it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"TheBloke/phi-2-GPTQ lorem ipsum 1.1642249822616577 1.8953758478164673\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1/1 [00:00<00:00, 4.96it/s]\n",
"100%|██████████| 1/1 [00:00<00:00, 4.49it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"TheBloke/phi-2-GPTQ wikipedia on LK-99 18.05230140686035 16.92823028564453\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1/1 [00:00<00:00, 5.16it/s]\n",
"100%|██████████| 1/1 [00:00<00:00, 5.03it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"TheBloke/phi-2-GPTQ I have a dream 2.8362326622009277 4.747076988220215\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1/1 [00:00<00:00, 4.35it/s]\n",
"100%|██████████| 1/1 [00:00<00:00, 4.32it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"TheBloke/phi-2-GPTQ AI gen fake paper 7.09130334854126 7.853880405426025\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1/1 [00:00<00:00, 4.18it/s]\n",
"100%|██████████| 1/1 [00:00<00:00, 3.83it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"TheBloke/phi-2-GPTQ Schmidhuber 2023 Subjective Novelty, Surprise 28.30998992919922 29.557880401611328\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1/1 [00:00<00:00, 4.44it/s]\n",
"100%|██████████| 1/1 [00:00<00:00, 4.13it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"TheBloke/phi-2-GPTQ email_to_fauci 21.060319900512695 18.597585678100586\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1/1 [00:00<00:00, 5.71it/s]\n",
"100%|██████████| 1/1 [00:00<00:00, 5.42it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"TheBloke/phi-2-GPTQ enron_email1 22.875591278076172 19.046457290649414\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1/1 [00:00<00:00, 4.84it/s]\n",
"100%|██████████| 1/1 [00:00<00:00, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"TheBloke/phi-2-GPTQ openai_board_ann 8.552927017211914 7.572012901306152\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"summaries = {}\n",
"data = []\n",
"for model_name in models:\n",
" model, tokenizer = load_model(model_name)\n",
" for sample in samples:\n",
" if sample['name'] not in summaries:\n",
" summaries[sample['name']] = summize(sample['text'])[:600]\n",
" summary = summaries[sample['name']]\n",
"\n",
" # before \n",
" s1 = sample['text']\n",
" results = perplexity_compute(data=s1, model=model, tokenizer=tokenizer, device='cuda')\n",
" before = results['mean_perplexity']\n",
"\n",
" # after \n",
" s2 = f\"\"\"\n",
" High level summary: {summary}\n",
"\n",
"Text:\n",
"{sample['text']}\n",
" \"\"\"\n",
" results = perplexity_compute(data=s2, model=model, tokenizer=tokenizer, device='cuda')\n",
" after = np.array(results['perplexities'])[-len(s1):].mean()\n",
"\n",
" print(model_name, sample['name'], before, after)\n",
" data.append(dict(before=before, after=after, model=model_name, sample=sample['name'],\n",
" in_training=sample['in_training'], len=len(sample['text'])))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>before</th>\n",
" <th>after</th>\n",
" <th>model</th>\n",
" <th>in_training</th>\n",
" <th>len</th>\n",
" <th>summarizable%</th>\n",
" </tr>\n",
" <tr>\n",
" <th>sample</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>bad_ml</th>\n",
" <td>12.456705</td>\n",
" <td>12.279064</td>\n",
" <td>TheBloke/phi-2-GPTQ</td>\n",
" <td>False</td>\n",
" <td>2345</td>\n",
" <td>0.014261</td>\n",
" </tr>\n",
" <tr>\n",
" <th>good_ml</th>\n",
" <td>22.663946</td>\n",
" <td>22.733511</td>\n",
" <td>TheBloke/phi-2-GPTQ</td>\n",
" <td>False</td>\n",
" <td>1004</td>\n",
" <td>-0.003069</td>\n",
" </tr>\n",
" <tr>\n",
" <th>wikipedia on LK-99</th>\n",
" <td>18.052301</td>\n",
" <td>16.928230</td>\n",
" <td>TheBloke/phi-2-GPTQ</td>\n",
" <td>False</td>\n",
" <td>1038</td>\n",
" <td>0.062267</td>\n",
" </tr>\n",
" <tr>\n",
" <th>AI gen fake paper</th>\n",
" <td>7.091303</td>\n",
" <td>7.853880</td>\n",
" <td>TheBloke/phi-2-GPTQ</td>\n",
" <td>False</td>\n",
" <td>2031</td>\n",
" <td>-0.107537</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Schmidhuber 2023 Subjective Novelty, Surprise</th>\n",
" <td>28.309990</td>\n",
" <td>29.557880</td>\n",
" <td>TheBloke/phi-2-GPTQ</td>\n",
" <td>False</td>\n",
" <td>2654</td>\n",
" <td>-0.044080</td>\n",
" </tr>\n",
" <tr>\n",
" <th>email_to_fauci</th>\n",
" <td>21.060320</td>\n",
" <td>18.597586</td>\n",
" <td>TheBloke/phi-2-GPTQ</td>\n",
" <td>False</td>\n",
" <td>1559</td>\n",
" <td>0.116937</td>\n",
" </tr>\n",
" <tr>\n",
" <th>openai_board_ann</th>\n",
" <td>8.552927</td>\n",
" <td>7.572013</td>\n",
" <td>TheBloke/phi-2-GPTQ</td>\n",
" <td>False</td>\n",
" <td>1191</td>\n",
" <td>0.114688</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" before after \\\n",
"sample \n",
"bad_ml 12.456705 12.279064 \n",
"good_ml 22.663946 22.733511 \n",
"wikipedia on LK-99 18.052301 16.928230 \n",
"AI gen fake paper 7.091303 7.853880 \n",
"Schmidhuber 2023 Subjective Novelty, Surprise 28.309990 29.557880 \n",
"email_to_fauci 21.060320 18.597586 \n",
"openai_board_ann 8.552927 7.572013 \n",
"\n",
" model \\\n",
"sample \n",
"bad_ml TheBloke/phi-2-GPTQ \n",
"good_ml TheBloke/phi-2-GPTQ \n",
"wikipedia on LK-99 TheBloke/phi-2-GPTQ \n",
"AI gen fake paper TheBloke/phi-2-GPTQ \n",
"Schmidhuber 2023 Subjective Novelty, Surprise TheBloke/phi-2-GPTQ \n",
"email_to_fauci TheBloke/phi-2-GPTQ \n",
"openai_board_ann TheBloke/phi-2-GPTQ \n",
"\n",
" in_training len \\\n",
"sample \n",
"bad_ml False 2345 \n",
"good_ml False 1004 \n",
"wikipedia on LK-99 False 1038 \n",
"AI gen fake paper False 2031 \n",
"Schmidhuber 2023 Subjective Novelty, Surprise False 2654 \n",
"email_to_fauci False 1559 \n",
"openai_board_ann False 1191 \n",
"\n",
" summarizable% \n",
"sample \n",
"bad_ml 0.014261 \n",
"good_ml -0.003069 \n",
"wikipedia on LK-99 0.062267 \n",
"AI gen fake paper -0.107537 \n",
"Schmidhuber 2023 Subjective Novelty, Surprise -0.044080 \n",
"email_to_fauci 0.116937 \n",
"openai_board_ann 0.114688 "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# results\n",
"df = pd.DataFrame(data).set_index('sample')\n",
"df = df.query('in_training == False')\n",
"df[\"summarizable%\"] = (df[\"before\"] - df[\"after\"])/df[\"before\"]\n",
"\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TheBloke/phi-2-GPTQ\n",
"| sample | before | after | improvement | improvement% | suprising | summarizable |\n",
"|:----------------------------------------------|---------:|---------:|--------------:|---------------:|:------------|:---------------|\n",
"| email_to_fauci | 21.0603 | 18.5976 | 2.46273 | 0.116937 | True | True |\n",
"| wikipedia on LK-99 | 18.0523 | 16.9282 | 1.12407 | 0.0622675 | True | True |\n",
"| openai_board_ann | 8.55293 | 7.57201 | 0.980914 | 0.114688 | False | True |\n",
"| bad_ml | 12.4567 | 12.2791 | 0.177641 | 0.0142607 | False | True |\n",
"| good_ml | 22.6639 | 22.7335 | -0.0695648 | -0.0030694 | True | False |\n",
"| AI gen fake paper | 7.0913 | 7.85388 | -0.762577 | -0.107537 | False | False |\n",
"| Schmidhuber 2023 Subjective Novelty, Surprise | 28.31 | 29.5579 | -1.24789 | -0.0440795 | True | False |\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>before</th>\n",
" <th>after</th>\n",
" <th>improvement</th>\n",
" <th>improvement%</th>\n",
" <th>suprising</th>\n",
" <th>summarizable</th>\n",
" </tr>\n",
" <tr>\n",
" <th>sample</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>email_to_fauci</th>\n",
" <td>21.060320</td>\n",
" <td>18.597586</td>\n",
" <td>2.462734</td>\n",
" <td>0.116937</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>wikipedia on LK-99</th>\n",
" <td>18.052301</td>\n",
" <td>16.928230</td>\n",
" <td>1.124071</td>\n",
" <td>0.062267</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>openai_board_ann</th>\n",
" <td>8.552927</td>\n",
" <td>7.572013</td>\n",
" <td>0.980914</td>\n",
" <td>0.114688</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>bad_ml</th>\n",
" <td>12.456705</td>\n",
" <td>12.279064</td>\n",
" <td>0.177641</td>\n",
" <td>0.014261</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>good_ml</th>\n",
" <td>22.663946</td>\n",
" <td>22.733511</td>\n",
" <td>-0.069565</td>\n",
" <td>-0.003069</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>AI gen fake paper</th>\n",
" <td>7.091303</td>\n",
" <td>7.853880</td>\n",
" <td>-0.762577</td>\n",
" <td>-0.107537</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Schmidhuber 2023 Subjective Novelty, Surprise</th>\n",
" <td>28.309990</td>\n",
" <td>29.557880</td>\n",
" <td>-1.247890</td>\n",
" <td>-0.044080</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" before after \\\n",
"sample \n",
"email_to_fauci 21.060320 18.597586 \n",
"wikipedia on LK-99 18.052301 16.928230 \n",
"openai_board_ann 8.552927 7.572013 \n",
"bad_ml 12.456705 12.279064 \n",
"good_ml 22.663946 22.733511 \n",
"AI gen fake paper 7.091303 7.853880 \n",
"Schmidhuber 2023 Subjective Novelty, Surprise 28.309990 29.557880 \n",
"\n",
" improvement improvement% \\\n",
"sample \n",
"email_to_fauci 2.462734 0.116937 \n",
"wikipedia on LK-99 1.124071 0.062267 \n",
"openai_board_ann 0.980914 0.114688 \n",
"bad_ml 0.177641 0.014261 \n",
"good_ml -0.069565 -0.003069 \n",
"AI gen fake paper -0.762577 -0.107537 \n",
"Schmidhuber 2023 Subjective Novelty, Surprise -1.247890 -0.044080 \n",
"\n",
" suprising summarizable \n",
"sample \n",
"email_to_fauci True True \n",
"wikipedia on LK-99 True True \n",
"openai_board_ann False True \n",
"bad_ml False True \n",
"good_ml True False \n",
"AI gen fake paper False False \n",
"Schmidhuber 2023 Subjective Novelty, Surprise True False "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df[\"improvement%\"] = (df[\"before\"] - df[\"after\"]) / df[\"before\"] \n",
"df[\"improvement\"] = (df[\"before\"] - df[\"after\"]) \n",
"df[\"summarizable\"] = df[\"improvement\"] > .0\n",
"df['suprising'] = df[\"before\"] > 15\n",
"df['BS'] = ~df[\"summarizable\"] | ~df['suprising']\n",
"for n,d in df.groupby(\"model\"):\n",
" print(n)\n",
" d = d[[ 'before', 'after', \"improvement\", \"improvement%\", 'suprising', 'summarizable', ]].sort_values(\"improvement\", ascending=False)\n",
" print(d.to_markdown())\n",
" display(d)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0rc1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}