diff --git a/.gitignore b/.gitignore
index 30bd623..9ef2685 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,192 @@
.env
+
+*.arrow
+squad_*
+*sbert_embedded*
+*.pkl
+ckpts*
+.deepspeed_env
+*.jsonl
+*tar.gz
+ckpts**
+wandb
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+
+# vs code
+.vscode
+*.bin
+
+.DS_Store
+
+# gpt4all-chat
+CMakeLists.txt.user
+gpt4all-chat/models/*
+build_*
+build-*
+
+# IntelliJ
+.idea/
+
+# LLM models
+*.gguf
diff --git a/nbs/01_use_tldr_prompt.ipynb b/nbs/01_use_tldr_prompt.ipynb
index 81f177b..c37bea5 100644
--- a/nbs/01_use_tldr_prompt.ipynb
+++ b/nbs/01_use_tldr_prompt.ipynb
@@ -683,6 +683,376 @@
"text": [
"TheBloke/Llama-2-7B-GPTQ openai_board_ann 5.916965007781982 5.880436897277832\n"
]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Using pad_token, but it is not set yet.\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.44it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.35it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Llama-2-13B-GPTQ bad_ml 7.593435764312744 7.552160739898682\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 1.79it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.60it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Llama-2-13B-GPTQ good_ml 12.493735313415527 11.74043083190918\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 1.34it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.33it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Llama-2-13B-GPTQ sokal hoax 3.6413912773132324 4.23477840423584\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 1.58it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.56it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Llama-2-13B-GPTQ Theory o. general relativity 11.865456581115723 12.391860008239746\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 1.84it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.73it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Llama-2-13B-GPTQ lorem ipsum 1.1234644651412964 2.4330925941467285\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 1.72it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.57it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Llama-2-13B-GPTQ wikipedia on LK-99 11.651829719543457 9.702957153320312\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 1.79it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.58it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Llama-2-13B-GPTQ I have a dream 1.9503285884857178 2.886058807373047\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 1.46it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.45it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Llama-2-13B-GPTQ AI gen fake paper 5.545047283172607 5.438870429992676\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 1.31it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.31it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Llama-2-13B-GPTQ Schmidhuber 2023 Subjective Novelty, Surprise 12.74594497680664 12.751182556152344\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 1.47it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.48it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Llama-2-13B-GPTQ email_to_fauci 9.83792495727539 9.111186981201172\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 2.06it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.88it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Llama-2-13B-GPTQ enron_email1 12.423323631286621 10.992777824401855\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 1.56it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.51it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Llama-2-13B-GPTQ openai_board_ann 5.368657112121582 5.724536418914795\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "Using pad_token, but it is not set yet.\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.85it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.92it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Mistral-7B-v0.1-GPTQ bad_ml 8.447798728942871 8.512030601501465\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 2.41it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 2.09it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Mistral-7B-v0.1-GPTQ good_ml 15.270345687866211 14.024930000305176\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 1.91it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.90it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Mistral-7B-v0.1-GPTQ sokal hoax 5.615131855010986 5.96171236038208\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 2.09it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 2.13it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Mistral-7B-v0.1-GPTQ Theory o. general relativity 12.043559074401855 13.062692642211914\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 2.53it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 2.44it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Mistral-7B-v0.1-GPTQ lorem ipsum 1.1297272443771362 2.379859685897827\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 1.89it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 2.14it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Mistral-7B-v0.1-GPTQ wikipedia on LK-99 11.517416954040527 10.170454025268555\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 2.42it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 2.37it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Mistral-7B-v0.1-GPTQ I have a dream 1.8666073083877563 3.1792848110198975\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 2.03it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.99it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Mistral-7B-v0.1-GPTQ AI gen fake paper 5.661380290985107 5.60957145690918\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 1.88it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 1.89it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Mistral-7B-v0.1-GPTQ Schmidhuber 2023 Subjective Novelty, Surprise 13.881444931030273 13.93079948425293\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 2.06it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 2.03it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Mistral-7B-v0.1-GPTQ email_to_fauci 10.894938468933105 10.550251960754395\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 2.60it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 2.51it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Mistral-7B-v0.1-GPTQ enron_email1 12.993982315063477 11.092223167419434\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 1/1 [00:00<00:00, 2.10it/s]\n",
+ "100%|██████████| 1/1 [00:00<00:00, 2.16it/s]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Mistral-7B-v0.1-GPTQ openai_board_ann 5.521510124206543 6.3877854347229\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
}
],
"source": [
@@ -711,7 +1081,8 @@
" after = np.array(results['perplexities'])[-len(s1):].mean()\n",
"\n",
" print(model_name, sample['name'], before, after)\n",
- " data.append(dict(before=before, after=after, model=model_name, sample=sample['name']))\n"
+ " data.append(dict(before=before, after=after, model=model_name, sample=sample['name'],\n",
+ " in_training=sample['in_training'], len=sample['len']))\n"
]
},
{
@@ -719,27 +1090,1181 @@
"execution_count": null,
"metadata": {},
"outputs": [],
- "source": [
- "results"
- ]
+ "source": []
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"# results\n",
"df = pd.DataFrame(data)\n",
"df[\"learning%\"] = (df[\"before\"] - df[\"after\"])/df[\"before\"]\n",
+ "df['in_training'] = None\n",
"# df"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 34,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " before | \n",
+ " after | \n",
+ " model | \n",
+ " sample | \n",
+ " learning% | \n",
+ " in_training | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 12.456705 | \n",
+ " 11.446499 | \n",
+ " TheBloke/phi-2-GPTQ | \n",
+ " bad_ml | \n",
+ " 0.081097 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 22.663946 | \n",
+ " 20.115414 | \n",
+ " TheBloke/phi-2-GPTQ | \n",
+ " good_ml | \n",
+ " 0.112449 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 14.285429 | \n",
+ " 14.216052 | \n",
+ " TheBloke/phi-2-GPTQ | \n",
+ " sokal hoax | \n",
+ " 0.004856 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 20.507641 | \n",
+ " 19.644333 | \n",
+ " TheBloke/phi-2-GPTQ | \n",
+ " Theory o. general relativity | \n",
+ " 0.042097 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1.164225 | \n",
+ " 2.354216 | \n",
+ " TheBloke/phi-2-GPTQ | \n",
+ " lorem ipsum | \n",
+ " -1.022131 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 18.052301 | \n",
+ " 14.710425 | \n",
+ " TheBloke/phi-2-GPTQ | \n",
+ " wikipedia on LK-99 | \n",
+ " 0.185122 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 2.836233 | \n",
+ " 4.256137 | \n",
+ " TheBloke/phi-2-GPTQ | \n",
+ " I have a dream | \n",
+ " -0.500631 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 7.091303 | \n",
+ " 7.495458 | \n",
+ " TheBloke/phi-2-GPTQ | \n",
+ " AI gen fake paper | \n",
+ " -0.056993 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 28.309990 | \n",
+ " 27.353872 | \n",
+ " TheBloke/phi-2-GPTQ | \n",
+ " Schmidhuber 2023 Subjective Novelty, Surprise | \n",
+ " 0.033773 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 21.060320 | \n",
+ " 18.637592 | \n",
+ " TheBloke/phi-2-GPTQ | \n",
+ " email_to_fauci | \n",
+ " 0.115038 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 22.875591 | \n",
+ " 19.709356 | \n",
+ " TheBloke/phi-2-GPTQ | \n",
+ " enron_email1 | \n",
+ " 0.138411 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " 8.552927 | \n",
+ " 9.061806 | \n",
+ " TheBloke/phi-2-GPTQ | \n",
+ " openai_board_ann | \n",
+ " -0.059498 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " 7.999119 | \n",
+ " 7.641711 | \n",
+ " TheBloke/Llama-2-7B-GPTQ | \n",
+ " bad_ml | \n",
+ " 0.044681 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 14.435519 | \n",
+ " 13.192563 | \n",
+ " TheBloke/Llama-2-7B-GPTQ | \n",
+ " good_ml | \n",
+ " 0.086104 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " 7.427894 | \n",
+ " 7.882517 | \n",
+ " TheBloke/Llama-2-7B-GPTQ | \n",
+ " sokal hoax | \n",
+ " -0.061205 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 13.795382 | \n",
+ " 13.612863 | \n",
+ " TheBloke/Llama-2-7B-GPTQ | \n",
+ " Theory o. general relativity | \n",
+ " 0.013230 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " 1.243477 | \n",
+ " 2.416914 | \n",
+ " TheBloke/Llama-2-7B-GPTQ | \n",
+ " lorem ipsum | \n",
+ " -0.943674 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " 12.341974 | \n",
+ " 9.843638 | \n",
+ " TheBloke/Llama-2-7B-GPTQ | \n",
+ " wikipedia on LK-99 | \n",
+ " 0.202426 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " 2.268300 | \n",
+ " 3.098978 | \n",
+ " TheBloke/Llama-2-7B-GPTQ | \n",
+ " I have a dream | \n",
+ " -0.366212 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " 6.164851 | \n",
+ " 6.050374 | \n",
+ " TheBloke/Llama-2-7B-GPTQ | \n",
+ " AI gen fake paper | \n",
+ " 0.018569 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " 13.611172 | \n",
+ " 13.453000 | \n",
+ " TheBloke/Llama-2-7B-GPTQ | \n",
+ " Schmidhuber 2023 Subjective Novelty, Surprise | \n",
+ " 0.011621 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " 11.317076 | \n",
+ " 10.221867 | \n",
+ " TheBloke/Llama-2-7B-GPTQ | \n",
+ " email_to_fauci | \n",
+ " 0.096775 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " 13.783302 | \n",
+ " 11.154099 | \n",
+ " TheBloke/Llama-2-7B-GPTQ | \n",
+ " enron_email1 | \n",
+ " 0.190753 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " 5.916965 | \n",
+ " 5.880437 | \n",
+ " TheBloke/Llama-2-7B-GPTQ | \n",
+ " openai_board_ann | \n",
+ " 0.006173 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " 7.593436 | \n",
+ " 7.552161 | \n",
+ " TheBloke/Llama-2-13B-GPTQ | \n",
+ " bad_ml | \n",
+ " 0.005436 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 25 | \n",
+ " 12.493735 | \n",
+ " 11.740431 | \n",
+ " TheBloke/Llama-2-13B-GPTQ | \n",
+ " good_ml | \n",
+ " 0.060295 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 26 | \n",
+ " 3.641391 | \n",
+ " 4.234778 | \n",
+ " TheBloke/Llama-2-13B-GPTQ | \n",
+ " sokal hoax | \n",
+ " -0.162956 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 27 | \n",
+ " 11.865457 | \n",
+ " 12.391860 | \n",
+ " TheBloke/Llama-2-13B-GPTQ | \n",
+ " Theory o. general relativity | \n",
+ " -0.044364 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 28 | \n",
+ " 1.123464 | \n",
+ " 2.433093 | \n",
+ " TheBloke/Llama-2-13B-GPTQ | \n",
+ " lorem ipsum | \n",
+ " -1.165705 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 29 | \n",
+ " 11.651830 | \n",
+ " 9.702957 | \n",
+ " TheBloke/Llama-2-13B-GPTQ | \n",
+ " wikipedia on LK-99 | \n",
+ " 0.167259 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 30 | \n",
+ " 1.950329 | \n",
+ " 2.886059 | \n",
+ " TheBloke/Llama-2-13B-GPTQ | \n",
+ " I have a dream | \n",
+ " -0.479781 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 31 | \n",
+ " 5.545047 | \n",
+ " 5.438870 | \n",
+ " TheBloke/Llama-2-13B-GPTQ | \n",
+ " AI gen fake paper | \n",
+ " 0.019148 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 32 | \n",
+ " 12.745945 | \n",
+ " 12.751183 | \n",
+ " TheBloke/Llama-2-13B-GPTQ | \n",
+ " Schmidhuber 2023 Subjective Novelty, Surprise | \n",
+ " -0.000411 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 33 | \n",
+ " 9.837925 | \n",
+ " 9.111187 | \n",
+ " TheBloke/Llama-2-13B-GPTQ | \n",
+ " email_to_fauci | \n",
+ " 0.073871 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 34 | \n",
+ " 12.423324 | \n",
+ " 10.992778 | \n",
+ " TheBloke/Llama-2-13B-GPTQ | \n",
+ " enron_email1 | \n",
+ " 0.115150 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 35 | \n",
+ " 5.368657 | \n",
+ " 5.724536 | \n",
+ " TheBloke/Llama-2-13B-GPTQ | \n",
+ " openai_board_ann | \n",
+ " -0.066288 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 36 | \n",
+ " 8.447799 | \n",
+ " 8.512031 | \n",
+ " TheBloke/Mistral-7B-v0.1-GPTQ | \n",
+ " bad_ml | \n",
+ " -0.007603 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 37 | \n",
+ " 15.270346 | \n",
+ " 14.024930 | \n",
+ " TheBloke/Mistral-7B-v0.1-GPTQ | \n",
+ " good_ml | \n",
+ " 0.081558 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 38 | \n",
+ " 5.615132 | \n",
+ " 5.961712 | \n",
+ " TheBloke/Mistral-7B-v0.1-GPTQ | \n",
+ " sokal hoax | \n",
+ " -0.061723 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 39 | \n",
+ " 12.043559 | \n",
+ " 13.062693 | \n",
+ " TheBloke/Mistral-7B-v0.1-GPTQ | \n",
+ " Theory o. general relativity | \n",
+ " -0.084621 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 40 | \n",
+ " 1.129727 | \n",
+ " 2.379860 | \n",
+ " TheBloke/Mistral-7B-v0.1-GPTQ | \n",
+ " lorem ipsum | \n",
+ " -1.106579 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 41 | \n",
+ " 11.517417 | \n",
+ " 10.170454 | \n",
+ " TheBloke/Mistral-7B-v0.1-GPTQ | \n",
+ " wikipedia on LK-99 | \n",
+ " 0.116950 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 42 | \n",
+ " 1.866607 | \n",
+ " 3.179285 | \n",
+ " TheBloke/Mistral-7B-v0.1-GPTQ | \n",
+ " I have a dream | \n",
+ " -0.703242 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 43 | \n",
+ " 5.661380 | \n",
+ " 5.609571 | \n",
+ " TheBloke/Mistral-7B-v0.1-GPTQ | \n",
+ " AI gen fake paper | \n",
+ " 0.009151 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 44 | \n",
+ " 13.881445 | \n",
+ " 13.930799 | \n",
+ " TheBloke/Mistral-7B-v0.1-GPTQ | \n",
+ " Schmidhuber 2023 Subjective Novelty, Surprise | \n",
+ " -0.003555 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 45 | \n",
+ " 10.894938 | \n",
+ " 10.550252 | \n",
+ " TheBloke/Mistral-7B-v0.1-GPTQ | \n",
+ " email_to_fauci | \n",
+ " 0.031637 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 46 | \n",
+ " 12.993982 | \n",
+ " 11.092223 | \n",
+ " TheBloke/Mistral-7B-v0.1-GPTQ | \n",
+ " enron_email1 | \n",
+ " 0.146357 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 47 | \n",
+ " 5.521510 | \n",
+ " 6.387785 | \n",
+ " TheBloke/Mistral-7B-v0.1-GPTQ | \n",
+ " openai_board_ann | \n",
+ " -0.156891 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " before after model \\\n",
+ "0 12.456705 11.446499 TheBloke/phi-2-GPTQ \n",
+ "1 22.663946 20.115414 TheBloke/phi-2-GPTQ \n",
+ "2 14.285429 14.216052 TheBloke/phi-2-GPTQ \n",
+ "3 20.507641 19.644333 TheBloke/phi-2-GPTQ \n",
+ "4 1.164225 2.354216 TheBloke/phi-2-GPTQ \n",
+ "5 18.052301 14.710425 TheBloke/phi-2-GPTQ \n",
+ "6 2.836233 4.256137 TheBloke/phi-2-GPTQ \n",
+ "7 7.091303 7.495458 TheBloke/phi-2-GPTQ \n",
+ "8 28.309990 27.353872 TheBloke/phi-2-GPTQ \n",
+ "9 21.060320 18.637592 TheBloke/phi-2-GPTQ \n",
+ "10 22.875591 19.709356 TheBloke/phi-2-GPTQ \n",
+ "11 8.552927 9.061806 TheBloke/phi-2-GPTQ \n",
+ "12 7.999119 7.641711 TheBloke/Llama-2-7B-GPTQ \n",
+ "13 14.435519 13.192563 TheBloke/Llama-2-7B-GPTQ \n",
+ "14 7.427894 7.882517 TheBloke/Llama-2-7B-GPTQ \n",
+ "15 13.795382 13.612863 TheBloke/Llama-2-7B-GPTQ \n",
+ "16 1.243477 2.416914 TheBloke/Llama-2-7B-GPTQ \n",
+ "17 12.341974 9.843638 TheBloke/Llama-2-7B-GPTQ \n",
+ "18 2.268300 3.098978 TheBloke/Llama-2-7B-GPTQ \n",
+ "19 6.164851 6.050374 TheBloke/Llama-2-7B-GPTQ \n",
+ "20 13.611172 13.453000 TheBloke/Llama-2-7B-GPTQ \n",
+ "21 11.317076 10.221867 TheBloke/Llama-2-7B-GPTQ \n",
+ "22 13.783302 11.154099 TheBloke/Llama-2-7B-GPTQ \n",
+ "23 5.916965 5.880437 TheBloke/Llama-2-7B-GPTQ \n",
+ "24 7.593436 7.552161 TheBloke/Llama-2-13B-GPTQ \n",
+ "25 12.493735 11.740431 TheBloke/Llama-2-13B-GPTQ \n",
+ "26 3.641391 4.234778 TheBloke/Llama-2-13B-GPTQ \n",
+ "27 11.865457 12.391860 TheBloke/Llama-2-13B-GPTQ \n",
+ "28 1.123464 2.433093 TheBloke/Llama-2-13B-GPTQ \n",
+ "29 11.651830 9.702957 TheBloke/Llama-2-13B-GPTQ \n",
+ "30 1.950329 2.886059 TheBloke/Llama-2-13B-GPTQ \n",
+ "31 5.545047 5.438870 TheBloke/Llama-2-13B-GPTQ \n",
+ "32 12.745945 12.751183 TheBloke/Llama-2-13B-GPTQ \n",
+ "33 9.837925 9.111187 TheBloke/Llama-2-13B-GPTQ \n",
+ "34 12.423324 10.992778 TheBloke/Llama-2-13B-GPTQ \n",
+ "35 5.368657 5.724536 TheBloke/Llama-2-13B-GPTQ \n",
+ "36 8.447799 8.512031 TheBloke/Mistral-7B-v0.1-GPTQ \n",
+ "37 15.270346 14.024930 TheBloke/Mistral-7B-v0.1-GPTQ \n",
+ "38 5.615132 5.961712 TheBloke/Mistral-7B-v0.1-GPTQ \n",
+ "39 12.043559 13.062693 TheBloke/Mistral-7B-v0.1-GPTQ \n",
+ "40 1.129727 2.379860 TheBloke/Mistral-7B-v0.1-GPTQ \n",
+ "41 11.517417 10.170454 TheBloke/Mistral-7B-v0.1-GPTQ \n",
+ "42 1.866607 3.179285 TheBloke/Mistral-7B-v0.1-GPTQ \n",
+ "43 5.661380 5.609571 TheBloke/Mistral-7B-v0.1-GPTQ \n",
+ "44 13.881445 13.930799 TheBloke/Mistral-7B-v0.1-GPTQ \n",
+ "45 10.894938 10.550252 TheBloke/Mistral-7B-v0.1-GPTQ \n",
+ "46 12.993982 11.092223 TheBloke/Mistral-7B-v0.1-GPTQ \n",
+ "47 5.521510 6.387785 TheBloke/Mistral-7B-v0.1-GPTQ \n",
+ "\n",
+ " sample learning% in_training \n",
+ "0 bad_ml 0.081097 None \n",
+ "1 good_ml 0.112449 None \n",
+ "2 sokal hoax 0.004856 None \n",
+ "3 Theory o. general relativity 0.042097 None \n",
+ "4 lorem ipsum -1.022131 None \n",
+ "5 wikipedia on LK-99 0.185122 None \n",
+ "6 I have a dream -0.500631 None \n",
+ "7 AI gen fake paper -0.056993 None \n",
+ "8 Schmidhuber 2023 Subjective Novelty, Surprise 0.033773 None \n",
+ "9 email_to_fauci 0.115038 None \n",
+ "10 enron_email1 0.138411 None \n",
+ "11 openai_board_ann -0.059498 None \n",
+ "12 bad_ml 0.044681 None \n",
+ "13 good_ml 0.086104 None \n",
+ "14 sokal hoax -0.061205 None \n",
+ "15 Theory o. general relativity 0.013230 None \n",
+ "16 lorem ipsum -0.943674 None \n",
+ "17 wikipedia on LK-99 0.202426 None \n",
+ "18 I have a dream -0.366212 None \n",
+ "19 AI gen fake paper 0.018569 None \n",
+ "20 Schmidhuber 2023 Subjective Novelty, Surprise 0.011621 None \n",
+ "21 email_to_fauci 0.096775 None \n",
+ "22 enron_email1 0.190753 None \n",
+ "23 openai_board_ann 0.006173 None \n",
+ "24 bad_ml 0.005436 None \n",
+ "25 good_ml 0.060295 None \n",
+ "26 sokal hoax -0.162956 None \n",
+ "27 Theory o. general relativity -0.044364 None \n",
+ "28 lorem ipsum -1.165705 None \n",
+ "29 wikipedia on LK-99 0.167259 None \n",
+ "30 I have a dream -0.479781 None \n",
+ "31 AI gen fake paper 0.019148 None \n",
+ "32 Schmidhuber 2023 Subjective Novelty, Surprise -0.000411 None \n",
+ "33 email_to_fauci 0.073871 None \n",
+ "34 enron_email1 0.115150 None \n",
+ "35 openai_board_ann -0.066288 None \n",
+ "36 bad_ml -0.007603 None \n",
+ "37 good_ml 0.081558 None \n",
+ "38 sokal hoax -0.061723 None \n",
+ "39 Theory o. general relativity -0.084621 None \n",
+ "40 lorem ipsum -1.106579 None \n",
+ "41 wikipedia on LK-99 0.116950 None \n",
+ "42 I have a dream -0.703242 None \n",
+ "43 AI gen fake paper 0.009151 None \n",
+ "44 Schmidhuber 2023 Subjective Novelty, Surprise -0.003555 None \n",
+ "45 email_to_fauci 0.031637 None \n",
+ "46 enron_email1 0.146357 None \n",
+ "47 openai_board_ann -0.156891 None "
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Llama-2-13B-GPTQ\n",
+ "| | sample | learning% | in_training |\n",
+ "|---:|:----------------------------------------------|-------------:|:--------------|\n",
+ "| 29 | wikipedia on LK-99 | 0.167259 | |\n",
+ "| 34 | enron_email1 | 0.11515 | |\n",
+ "| 33 | email_to_fauci | 0.0738711 | |\n",
+ "| 25 | good_ml | 0.0602946 | |\n",
+ "| 31 | AI gen fake paper | 0.0191481 | |\n",
+ "| 24 | bad_ml | 0.00543562 | |\n",
+ "| 32 | Schmidhuber 2023 Subjective Novelty, Surprise | -0.000410921 | |\n",
+ "| 27 | Theory o. general relativity | -0.0443644 | |\n",
+ "| 35 | openai_board_ann | -0.0662883 | |\n",
+ "| 26 | sokal hoax | -0.162956 | |\n",
+ "| 30 | I have a dream | -0.479781 | |\n",
+ "| 28 | lorem ipsum | -1.1657 | |\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sample | \n",
+ " learning% | \n",
+ " in_training | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 29 | \n",
+ " wikipedia on LK-99 | \n",
+ " 0.167259 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 34 | \n",
+ " enron_email1 | \n",
+ " 0.115150 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 33 | \n",
+ " email_to_fauci | \n",
+ " 0.073871 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 25 | \n",
+ " good_ml | \n",
+ " 0.060295 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 31 | \n",
+ " AI gen fake paper | \n",
+ " 0.019148 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " bad_ml | \n",
+ " 0.005436 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 32 | \n",
+ " Schmidhuber 2023 Subjective Novelty, Surprise | \n",
+ " -0.000411 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 27 | \n",
+ " Theory o. general relativity | \n",
+ " -0.044364 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 35 | \n",
+ " openai_board_ann | \n",
+ " -0.066288 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 26 | \n",
+ " sokal hoax | \n",
+ " -0.162956 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 30 | \n",
+ " I have a dream | \n",
+ " -0.479781 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 28 | \n",
+ " lorem ipsum | \n",
+ " -1.165705 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sample learning% in_training\n",
+ "29 wikipedia on LK-99 0.167259 None\n",
+ "34 enron_email1 0.115150 None\n",
+ "33 email_to_fauci 0.073871 None\n",
+ "25 good_ml 0.060295 None\n",
+ "31 AI gen fake paper 0.019148 None\n",
+ "24 bad_ml 0.005436 None\n",
+ "32 Schmidhuber 2023 Subjective Novelty, Surprise -0.000411 None\n",
+ "27 Theory o. general relativity -0.044364 None\n",
+ "35 openai_board_ann -0.066288 None\n",
+ "26 sokal hoax -0.162956 None\n",
+ "30 I have a dream -0.479781 None\n",
+ "28 lorem ipsum -1.165705 None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Llama-2-7B-GPTQ\n",
+ "| | sample | learning% | in_training |\n",
+ "|---:|:----------------------------------------------|------------:|:--------------|\n",
+ "| 17 | wikipedia on LK-99 | 0.202426 | |\n",
+ "| 22 | enron_email1 | 0.190753 | |\n",
+ "| 21 | email_to_fauci | 0.0967749 | |\n",
+ "| 13 | good_ml | 0.086104 | |\n",
+ "| 12 | bad_ml | 0.0446809 | |\n",
+ "| 19 | AI gen fake paper | 0.0185693 | |\n",
+ "| 15 | Theory o. general relativity | 0.0132304 | |\n",
+ "| 20 | Schmidhuber 2023 Subjective Novelty, Surprise | 0.0116207 | |\n",
+ "| 23 | openai_board_ann | 0.00617345 | |\n",
+ "| 14 | sokal hoax | -0.0612049 | |\n",
+ "| 18 | I have a dream | -0.366212 | |\n",
+ "| 16 | lorem ipsum | -0.943674 | |\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sample | \n",
+ " learning% | \n",
+ " in_training | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 17 | \n",
+ " wikipedia on LK-99 | \n",
+ " 0.202426 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " enron_email1 | \n",
+ " 0.190753 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " email_to_fauci | \n",
+ " 0.096775 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " good_ml | \n",
+ " 0.086104 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " bad_ml | \n",
+ " 0.044681 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " AI gen fake paper | \n",
+ " 0.018569 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " Theory o. general relativity | \n",
+ " 0.013230 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " Schmidhuber 2023 Subjective Novelty, Surprise | \n",
+ " 0.011621 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " openai_board_ann | \n",
+ " 0.006173 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " sokal hoax | \n",
+ " -0.061205 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " I have a dream | \n",
+ " -0.366212 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " lorem ipsum | \n",
+ " -0.943674 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sample learning% in_training\n",
+ "17 wikipedia on LK-99 0.202426 None\n",
+ "22 enron_email1 0.190753 None\n",
+ "21 email_to_fauci 0.096775 None\n",
+ "13 good_ml 0.086104 None\n",
+ "12 bad_ml 0.044681 None\n",
+ "19 AI gen fake paper 0.018569 None\n",
+ "15 Theory o. general relativity 0.013230 None\n",
+ "20 Schmidhuber 2023 Subjective Novelty, Surprise 0.011621 None\n",
+ "23 openai_board_ann 0.006173 None\n",
+ "14 sokal hoax -0.061205 None\n",
+ "18 I have a dream -0.366212 None\n",
+ "16 lorem ipsum -0.943674 None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Mistral-7B-v0.1-GPTQ\n",
+ "| | sample | learning% | in_training |\n",
+ "|---:|:----------------------------------------------|------------:|:--------------|\n",
+ "| 46 | enron_email1 | 0.146357 | |\n",
+ "| 41 | wikipedia on LK-99 | 0.11695 | |\n",
+ "| 37 | good_ml | 0.0815578 | |\n",
+ "| 45 | email_to_fauci | 0.0316373 | |\n",
+ "| 43 | AI gen fake paper | 0.00915127 | |\n",
+ "| 44 | Schmidhuber 2023 Subjective Novelty, Surprise | -0.00355543 | |\n",
+ "| 36 | bad_ml | -0.00760339 | |\n",
+ "| 38 | sokal hoax | -0.0617226 | |\n",
+ "| 39 | Theory o. general relativity | -0.0846206 | |\n",
+ "| 47 | openai_board_ann | -0.156891 | |\n",
+ "| 42 | I have a dream | -0.703242 | |\n",
+ "| 40 | lorem ipsum | -1.10658 | |\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sample | \n",
+ " learning% | \n",
+ " in_training | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 46 | \n",
+ " enron_email1 | \n",
+ " 0.146357 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 41 | \n",
+ " wikipedia on LK-99 | \n",
+ " 0.116950 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 37 | \n",
+ " good_ml | \n",
+ " 0.081558 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 45 | \n",
+ " email_to_fauci | \n",
+ " 0.031637 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 43 | \n",
+ " AI gen fake paper | \n",
+ " 0.009151 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 44 | \n",
+ " Schmidhuber 2023 Subjective Novelty, Surprise | \n",
+ " -0.003555 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 36 | \n",
+ " bad_ml | \n",
+ " -0.007603 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 38 | \n",
+ " sokal hoax | \n",
+ " -0.061723 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 39 | \n",
+ " Theory o. general relativity | \n",
+ " -0.084621 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 47 | \n",
+ " openai_board_ann | \n",
+ " -0.156891 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 42 | \n",
+ " I have a dream | \n",
+ " -0.703242 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 40 | \n",
+ " lorem ipsum | \n",
+ " -1.106579 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sample learning% in_training\n",
+ "46 enron_email1 0.146357 None\n",
+ "41 wikipedia on LK-99 0.116950 None\n",
+ "37 good_ml 0.081558 None\n",
+ "45 email_to_fauci 0.031637 None\n",
+ "43 AI gen fake paper 0.009151 None\n",
+ "44 Schmidhuber 2023 Subjective Novelty, Surprise -0.003555 None\n",
+ "36 bad_ml -0.007603 None\n",
+ "38 sokal hoax -0.061723 None\n",
+ "39 Theory o. general relativity -0.084621 None\n",
+ "47 openai_board_ann -0.156891 None\n",
+ "42 I have a dream -0.703242 None\n",
+ "40 lorem ipsum -1.106579 None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/phi-2-GPTQ\n",
+ "| | sample | learning% | in_training |\n",
+ "|---:|:----------------------------------------------|------------:|:--------------|\n",
+ "| 5 | wikipedia on LK-99 | 0.185122 | |\n",
+ "| 10 | enron_email1 | 0.138411 | |\n",
+ "| 9 | email_to_fauci | 0.115038 | |\n",
+ "| 1 | good_ml | 0.112449 | |\n",
+ "| 0 | bad_ml | 0.0810974 | |\n",
+ "| 3 | Theory o. general relativity | 0.0420969 | |\n",
+ "| 8 | Schmidhuber 2023 Subjective Novelty, Surprise | 0.0337732 | |\n",
+ "| 2 | sokal hoax | 0.00485648 | |\n",
+ "| 7 | AI gen fake paper | -0.056993 | |\n",
+ "| 11 | openai_board_ann | -0.0594976 | |\n",
+ "| 6 | I have a dream | -0.500631 | |\n",
+ "| 4 | lorem ipsum | -1.02213 | |\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sample | \n",
+ " learning% | \n",
+ " in_training | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 5 | \n",
+ " wikipedia on LK-99 | \n",
+ " 0.185122 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " enron_email1 | \n",
+ " 0.138411 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " email_to_fauci | \n",
+ " 0.115038 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " good_ml | \n",
+ " 0.112449 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " bad_ml | \n",
+ " 0.081097 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Theory o. general relativity | \n",
+ " 0.042097 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " Schmidhuber 2023 Subjective Novelty, Surprise | \n",
+ " 0.033773 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " sokal hoax | \n",
+ " 0.004856 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " AI gen fake paper | \n",
+ " -0.056993 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " openai_board_ann | \n",
+ " -0.059498 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " I have a dream | \n",
+ " -0.500631 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " lorem ipsum | \n",
+ " -1.022131 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sample learning% in_training\n",
+ "5 wikipedia on LK-99 0.185122 None\n",
+ "10 enron_email1 0.138411 None\n",
+ "9 email_to_fauci 0.115038 None\n",
+ "1 good_ml 0.112449 None\n",
+ "0 bad_ml 0.081097 None\n",
+ "3 Theory o. general relativity 0.042097 None\n",
+ "8 Schmidhuber 2023 Subjective Novelty, Surprise 0.033773 None\n",
+ "2 sokal hoax 0.004856 None\n",
+ "7 AI gen fake paper -0.056993 None\n",
+ "11 openai_board_ann -0.059498 None\n",
+ "6 I have a dream -0.500631 None\n",
+ "4 lorem ipsum -1.022131 None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
"for n,d in df.groupby(\"model\"):\n",
" print(n)\n",
@@ -750,12 +2275,784 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Llama-2-13B-GPTQ\n",
+ "| | sample | learning% | in_training |\n",
+ "|---:|:----------------------------------------------|------------:|:--------------|\n",
+ "| 29 | wikipedia on LK-99 | 1.94887 | |\n",
+ "| 34 | enron_email1 | 1.43055 | |\n",
+ "| 25 | good_ml | 0.753304 | |\n",
+ "| 33 | email_to_fauci | 0.726738 | |\n",
+ "| 31 | AI gen fake paper | 0.106177 | |\n",
+ "| 24 | bad_ml | 0.041275 | |\n",
+ "| 32 | Schmidhuber 2023 Subjective Novelty, Surprise | -0.00523758 | |\n",
+ "| 35 | openai_board_ann | -0.355879 | |\n",
+ "| 27 | Theory o. general relativity | -0.526403 | |\n",
+ "| 26 | sokal hoax | -0.593387 | |\n",
+ "| 30 | I have a dream | -0.93573 | |\n",
+ "| 28 | lorem ipsum | -1.30963 | |\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sample | \n",
+ " learning% | \n",
+ " in_training | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 29 | \n",
+ " wikipedia on LK-99 | \n",
+ " 1.948873 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 34 | \n",
+ " enron_email1 | \n",
+ " 1.430546 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 25 | \n",
+ " good_ml | \n",
+ " 0.753304 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 33 | \n",
+ " email_to_fauci | \n",
+ " 0.726738 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 31 | \n",
+ " AI gen fake paper | \n",
+ " 0.106177 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " bad_ml | \n",
+ " 0.041275 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 32 | \n",
+ " Schmidhuber 2023 Subjective Novelty, Surprise | \n",
+ " -0.005238 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 35 | \n",
+ " openai_board_ann | \n",
+ " -0.355879 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 27 | \n",
+ " Theory o. general relativity | \n",
+ " -0.526403 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 26 | \n",
+ " sokal hoax | \n",
+ " -0.593387 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 30 | \n",
+ " I have a dream | \n",
+ " -0.935730 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 28 | \n",
+ " lorem ipsum | \n",
+ " -1.309628 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sample learning% in_training\n",
+ "29 wikipedia on LK-99 1.948873 None\n",
+ "34 enron_email1 1.430546 None\n",
+ "25 good_ml 0.753304 None\n",
+ "33 email_to_fauci 0.726738 None\n",
+ "31 AI gen fake paper 0.106177 None\n",
+ "24 bad_ml 0.041275 None\n",
+ "32 Schmidhuber 2023 Subjective Novelty, Surprise -0.005238 None\n",
+ "35 openai_board_ann -0.355879 None\n",
+ "27 Theory o. general relativity -0.526403 None\n",
+ "26 sokal hoax -0.593387 None\n",
+ "30 I have a dream -0.935730 None\n",
+ "28 lorem ipsum -1.309628 None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Llama-2-7B-GPTQ\n",
+ "| | sample | learning% | in_training |\n",
+ "|---:|:----------------------------------------------|------------:|:--------------|\n",
+ "| 22 | enron_email1 | 2.6292 | |\n",
+ "| 17 | wikipedia on LK-99 | 2.49834 | |\n",
+ "| 13 | good_ml | 1.24296 | |\n",
+ "| 21 | email_to_fauci | 1.09521 | |\n",
+ "| 12 | bad_ml | 0.357408 | |\n",
+ "| 15 | Theory o. general relativity | 0.182519 | |\n",
+ "| 20 | Schmidhuber 2023 Subjective Novelty, Surprise | 0.158172 | |\n",
+ "| 19 | AI gen fake paper | 0.114477 | |\n",
+ "| 23 | openai_board_ann | 0.0365281 | |\n",
+ "| 14 | sokal hoax | -0.454623 | |\n",
+ "| 18 | I have a dream | -0.830678 | |\n",
+ "| 16 | lorem ipsum | -1.17344 | |\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sample | \n",
+ " learning% | \n",
+ " in_training | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 22 | \n",
+ " enron_email1 | \n",
+ " 2.629203 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " wikipedia on LK-99 | \n",
+ " 2.498336 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " good_ml | \n",
+ " 1.242956 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " email_to_fauci | \n",
+ " 1.095209 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " bad_ml | \n",
+ " 0.357408 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " Theory o. general relativity | \n",
+ " 0.182519 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " Schmidhuber 2023 Subjective Novelty, Surprise | \n",
+ " 0.158172 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " AI gen fake paper | \n",
+ " 0.114477 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " openai_board_ann | \n",
+ " 0.036528 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " sokal hoax | \n",
+ " -0.454623 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " I have a dream | \n",
+ " -0.830678 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " lorem ipsum | \n",
+ " -1.173437 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sample learning% in_training\n",
+ "22 enron_email1 2.629203 None\n",
+ "17 wikipedia on LK-99 2.498336 None\n",
+ "13 good_ml 1.242956 None\n",
+ "21 email_to_fauci 1.095209 None\n",
+ "12 bad_ml 0.357408 None\n",
+ "15 Theory o. general relativity 0.182519 None\n",
+ "20 Schmidhuber 2023 Subjective Novelty, Surprise 0.158172 None\n",
+ "19 AI gen fake paper 0.114477 None\n",
+ "23 openai_board_ann 0.036528 None\n",
+ "14 sokal hoax -0.454623 None\n",
+ "18 I have a dream -0.830678 None\n",
+ "16 lorem ipsum -1.173437 None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/Mistral-7B-v0.1-GPTQ\n",
+ "| | sample | learning% | in_training |\n",
+ "|---:|:----------------------------------------------|------------:|:--------------|\n",
+ "| 46 | enron_email1 | 1.90176 | |\n",
+ "| 41 | wikipedia on LK-99 | 1.34696 | |\n",
+ "| 37 | good_ml | 1.24542 | |\n",
+ "| 45 | email_to_fauci | 0.344687 | |\n",
+ "| 43 | AI gen fake paper | 0.0518088 | |\n",
+ "| 44 | Schmidhuber 2023 Subjective Novelty, Surprise | -0.0493546 | |\n",
+ "| 36 | bad_ml | -0.0642319 | |\n",
+ "| 38 | sokal hoax | -0.346581 | |\n",
+ "| 47 | openai_board_ann | -0.866275 | |\n",
+ "| 39 | Theory o. general relativity | -1.01913 | |\n",
+ "| 40 | lorem ipsum | -1.25013 | |\n",
+ "| 42 | I have a dream | -1.31268 | |\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sample | \n",
+ " learning% | \n",
+ " in_training | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 46 | \n",
+ " enron_email1 | \n",
+ " 1.901759 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 41 | \n",
+ " wikipedia on LK-99 | \n",
+ " 1.346963 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 37 | \n",
+ " good_ml | \n",
+ " 1.245416 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 45 | \n",
+ " email_to_fauci | \n",
+ " 0.344687 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 43 | \n",
+ " AI gen fake paper | \n",
+ " 0.051809 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 44 | \n",
+ " Schmidhuber 2023 Subjective Novelty, Surprise | \n",
+ " -0.049355 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 36 | \n",
+ " bad_ml | \n",
+ " -0.064232 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 38 | \n",
+ " sokal hoax | \n",
+ " -0.346581 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 47 | \n",
+ " openai_board_ann | \n",
+ " -0.866275 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 39 | \n",
+ " Theory o. general relativity | \n",
+ " -1.019134 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 40 | \n",
+ " lorem ipsum | \n",
+ " -1.250132 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 42 | \n",
+ " I have a dream | \n",
+ " -1.312678 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sample learning% in_training\n",
+ "46 enron_email1 1.901759 None\n",
+ "41 wikipedia on LK-99 1.346963 None\n",
+ "37 good_ml 1.245416 None\n",
+ "45 email_to_fauci 0.344687 None\n",
+ "43 AI gen fake paper 0.051809 None\n",
+ "44 Schmidhuber 2023 Subjective Novelty, Surprise -0.049355 None\n",
+ "36 bad_ml -0.064232 None\n",
+ "38 sokal hoax -0.346581 None\n",
+ "47 openai_board_ann -0.866275 None\n",
+ "39 Theory o. general relativity -1.019134 None\n",
+ "40 lorem ipsum -1.250132 None\n",
+ "42 I have a dream -1.312678 None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TheBloke/phi-2-GPTQ\n",
+ "| | sample | learning% | in_training |\n",
+ "|---:|:----------------------------------------------|------------:|:--------------|\n",
+ "| 5 | wikipedia on LK-99 | 3.34188 | |\n",
+ "| 10 | enron_email1 | 3.16623 | |\n",
+ "| 1 | good_ml | 2.54853 | |\n",
+ "| 9 | email_to_fauci | 2.42273 | |\n",
+ "| 0 | bad_ml | 1.01021 | |\n",
+ "| 8 | Schmidhuber 2023 Subjective Novelty, Surprise | 0.956118 | |\n",
+ "| 3 | Theory o. general relativity | 0.863308 | |\n",
+ "| 2 | sokal hoax | 0.0693769 | |\n",
+ "| 7 | AI gen fake paper | -0.404154 | |\n",
+ "| 11 | openai_board_ann | -0.508879 | |\n",
+ "| 4 | lorem ipsum | -1.18999 | |\n",
+ "| 6 | I have a dream | -1.4199 | |\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sample | \n",
+ " learning% | \n",
+ " in_training | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 5 | \n",
+ " wikipedia on LK-99 | \n",
+ " 3.341876 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " enron_email1 | \n",
+ " 3.166235 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " good_ml | \n",
+ " 2.548532 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " email_to_fauci | \n",
+ " 2.422728 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 0 | \n",
+ " bad_ml | \n",
+ " 1.010206 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " Schmidhuber 2023 Subjective Novelty, Surprise | \n",
+ " 0.956118 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Theory o. general relativity | \n",
+ " 0.863308 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " sokal hoax | \n",
+ " 0.069377 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " AI gen fake paper | \n",
+ " -0.404154 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " openai_board_ann | \n",
+ " -0.508879 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " lorem ipsum | \n",
+ " -1.189991 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " I have a dream | \n",
+ " -1.419905 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sample learning% in_training\n",
+ "5 wikipedia on LK-99 3.341876 None\n",
+ "10 enron_email1 3.166235 None\n",
+ "1 good_ml 2.548532 None\n",
+ "9 email_to_fauci 2.422728 None\n",
+ "0 bad_ml 1.010206 None\n",
+ "8 Schmidhuber 2023 Subjective Novelty, Surprise 0.956118 None\n",
+ "3 Theory o. general relativity 0.863308 None\n",
+ "2 sokal hoax 0.069377 None\n",
+ "7 AI gen fake paper -0.404154 None\n",
+ "11 openai_board_ann -0.508879 None\n",
+ "4 lorem ipsum -1.189991 None\n",
+ "6 I have a dream -1.419905 None"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "df[\"learning%\"] = (df[\"before\"] - df[\"after\"])\n",
+ "for n,d in df.groupby(\"model\"):\n",
+ " print(n)\n",
+ " d = d[['sample', 'learning%', 'in_training']].sort_values(\"learning%\", ascending=False)\n",
+ " print(d.to_markdown())\n",
+ " display(d)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [],
"source": [
"# TODO: compare big and small"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "big numbers mean a difference between big and small models\n",
+ "| sample | before | after | learning% |\n",
+ "|:----------------------------------------------|---------:|-----------:|------------:|\n",
+ "| sokal hoax | 3.7865 | 3.64774 | 0.138764 |\n",
+ "| good_ml | 1.94178 | 1.45213 | 0.489652 |\n",
+ "| Theory o. general relativity | 1.92992 | 1.221 | 0.708922 |\n",
+ "| email_to_fauci | 1.47915 | 1.11068 | 0.368471 |\n",
+ "| Schmidhuber 2023 Subjective Novelty, Surprise | 0.865227 | 0.701818 | 0.163409 |\n",
+ "| AI gen fake paper | 0.619803 | 0.611503 | 0.0083003 |\n",
+ "| I have a dream | 0.317971 | 0.212919 | 0.105052 |\n",
+ "| enron_email1 | 1.35998 | 0.161322 | 1.19866 |\n",
+ "| openai_board_ann | 0.548308 | 0.1559 | 0.392407 |\n",
+ "| wikipedia on LK-99 | 0.690145 | 0.140681 | 0.549463 |\n",
+ "| bad_ml | 0.405684 | 0.0895505 | 0.316133 |\n",
+ "| lorem ipsum | 0.120013 | -0.0161781 | 0.136191 |\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " before | \n",
+ " after | \n",
+ " learning% | \n",
+ "
\n",
+ " \n",
+ " | sample | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | sokal hoax | \n",
+ " 3.786502 | \n",
+ " 3.647738 | \n",
+ " 0.138764 | \n",
+ "
\n",
+ " \n",
+ " | good_ml | \n",
+ " 1.941784 | \n",
+ " 1.452132 | \n",
+ " 0.489652 | \n",
+ "
\n",
+ " \n",
+ " | Theory o. general relativity | \n",
+ " 1.929925 | \n",
+ " 1.221003 | \n",
+ " 0.708922 | \n",
+ "
\n",
+ " \n",
+ " | email_to_fauci | \n",
+ " 1.479151 | \n",
+ " 1.110680 | \n",
+ " 0.368471 | \n",
+ "
\n",
+ " \n",
+ " | Schmidhuber 2023 Subjective Novelty, Surprise | \n",
+ " 0.865227 | \n",
+ " 0.701818 | \n",
+ " 0.163409 | \n",
+ "
\n",
+ " \n",
+ " | AI gen fake paper | \n",
+ " 0.619803 | \n",
+ " 0.611503 | \n",
+ " 0.008300 | \n",
+ "
\n",
+ " \n",
+ " | I have a dream | \n",
+ " 0.317971 | \n",
+ " 0.212919 | \n",
+ " 0.105052 | \n",
+ "
\n",
+ " \n",
+ " | enron_email1 | \n",
+ " 1.359979 | \n",
+ " 0.161322 | \n",
+ " 1.198657 | \n",
+ "
\n",
+ " \n",
+ " | openai_board_ann | \n",
+ " 0.548308 | \n",
+ " 0.155900 | \n",
+ " 0.392407 | \n",
+ "
\n",
+ " \n",
+ " | wikipedia on LK-99 | \n",
+ " 0.690145 | \n",
+ " 0.140681 | \n",
+ " 0.549463 | \n",
+ "
\n",
+ " \n",
+ " | bad_ml | \n",
+ " 0.405684 | \n",
+ " 0.089550 | \n",
+ " 0.316133 | \n",
+ "
\n",
+ " \n",
+ " | lorem ipsum | \n",
+ " 0.120013 | \n",
+ " -0.016178 | \n",
+ " 0.136191 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " before after learning%\n",
+ "sample \n",
+ "sokal hoax 3.786502 3.647738 0.138764\n",
+ "good_ml 1.941784 1.452132 0.489652\n",
+ "Theory o. general relativity 1.929925 1.221003 0.708922\n",
+ "email_to_fauci 1.479151 1.110680 0.368471\n",
+ "Schmidhuber 2023 Subjective Novelty, Surprise 0.865227 0.701818 0.163409\n",
+ "AI gen fake paper 0.619803 0.611503 0.008300\n",
+ "I have a dream 0.317971 0.212919 0.105052\n",
+ "enron_email1 1.359979 0.161322 1.198657\n",
+ "openai_board_ann 0.548308 0.155900 0.392407\n",
+ "wikipedia on LK-99 0.690145 0.140681 0.549463\n",
+ "bad_ml 0.405684 0.089550 0.316133\n",
+ "lorem ipsum 0.120013 -0.016178 0.136191"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "a = df[df.model==\"TheBloke/Llama-2-7B-GPTQ\"].set_index('sample').drop(columns=['model', 'in_training'])\n",
+ "b = df[df.model==\"TheBloke/Llama-2-13B-GPTQ\"].set_index('sample').drop(columns=['model', 'in_training'])\n",
+ "d = (a-b).sort_values(\"after\", ascending=False)\n",
+ "print('big numbers (for after and learning) mean the smaller model was more confused')\n",
+ "print(d.to_markdown())\n",
+ "d\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
diff --git a/nbs/02_use_lora.ipynb b/nbs/02_use_lora.ipynb
index f814dd3..2f64a46 100644
--- a/nbs/02_use_lora.ipynb
+++ b/nbs/02_use_lora.ipynb
@@ -30,6 +30,7 @@
"from datasets import load_dataset\n",
"from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\n",
"import numpy as np\n",
+ "import pandas as pd\n",
"from peft import LoraConfig, get_peft_model, IA3Config"
]
},
@@ -71,8 +72,8 @@
"import json\n",
"samples = json.load(open(\"../samples.json\"))\n",
"\n",
- "sample = samples[0]\n",
- "sample"
+ "# sample = samples[0]\n",
+ "# sample"
]
},
{
@@ -108,7 +109,7 @@
" device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
"\n",
" # model = AutoModelForCausalLM.from_pretrained(model_id)\n",
- " # model = model.to(device)\n",
+ " model = model.to(device)\n",
"\n",
" # tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
"\n",
@@ -201,8 +202,8 @@
"metadata": {},
"outputs": [],
"source": [
- "results = perplexity_compute(data=sample['text'], model=model, tokenizer=tokenizer, device='cuda')\n",
- "results['mean_perplexity']"
+ "# results = perplexity_compute(data=sample['text'], model=model, tokenizer=tokenizer, device='cuda')\n",
+ "# results['mean_perplexity']"
]
},
{
@@ -243,19 +244,31 @@
"model.lm_head = CastOutputToFloat(model.lm_head)\n"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "peft_config = IA3Config(\n",
- " target_modules=[ \"fc2\", \"Wqkv\",], \n",
- " feedforward_modules=[\"fc2\"],\n",
- " inference_mode=False,\n",
- ")\n",
- "model = get_peft_model(model, peft_config)\n",
- "model.config.use_cache = False"
+ "# # Verifying the datatypes.\n",
+ "# dtypes = {}\n",
+ "# for _, p in model.named_parameters():\n",
+ "# dtype = p.dtype\n",
+ "# if dtype not in dtypes:\n",
+ "# dtypes[dtype] = 0\n",
+ "# dtypes[dtype] += p.numel()\n",
+ "# total = 0\n",
+ "# for k, v in dtypes.items():\n",
+ "# total += v\n",
+ "# for k, v in dtypes.items():\n",
+ "# print(k, v, v / total)"
]
},
{
@@ -264,19 +277,7 @@
"metadata": {},
"outputs": [],
"source": [
- "\n",
- "# Verifying the datatypes.\n",
- "dtypes = {}\n",
- "for _, p in model.named_parameters():\n",
- " dtype = p.dtype\n",
- " if dtype not in dtypes:\n",
- " dtypes[dtype] = 0\n",
- " dtypes[dtype] += p.numel()\n",
- "total = 0\n",
- "for k, v in dtypes.items():\n",
- " total += v\n",
- "for k, v in dtypes.items():\n",
- " print(k, v, v / total)\n"
+ "# sample['text']"
]
},
{
@@ -286,12 +287,12 @@
"outputs": [],
"source": [
"\"\"\"### Training\"\"\"\n",
- "from datasets import Dataset\n",
+ "# from datasets import Dataset\n",
"\n",
"# data = load_dataset(\"Abirate/english_quotes\")\n",
- "data = Dataset.from_dict({\"text\": [sample['text'][:len(sample['text'])//2]]*100})\n",
- "data = data.map(lambda samples: tokenizer(samples[\"text\"]), batched=True).with_format(\"torch\")\n",
- "data"
+ "# data = Dataset.from_dict({\"text\": [sample['text'][:len(sample['text'])//2]]*100})\n",
+ "# data = data.map(lambda samples: tokenizer(samples[\"text\"]), batched=True).with_format(\"torch\")\n",
+ "# data"
]
},
{
@@ -300,8 +301,7 @@
"metadata": {},
"outputs": [],
"source": [
- "from torch.utils.data import DataLoader\n",
- "# batch.keys()"
+ "from torch.nn import functional as F"
]
},
{
@@ -310,17 +310,74 @@
"metadata": {},
"outputs": [],
"source": [
- "optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)\n",
- "model.train()\n",
- "model = model.to('cuda')\n",
- "for epoch in range(10):\n",
- " for batch in DataLoader(data, batch_size=1):\n",
- " b_in = {'input_ids': batch['input_ids'].to('cuda').to(dtype), 'attention_mask': batch['attention_mask'].to('cuda').to(dtype)}\n",
- " optimizer.zero_grad()\n",
- " loss = model(**batch).loss\n",
- " loss.backward()\n",
- " optimizer.step()\n",
- " print(loss.item())"
+ "def lora_eval(model, sample):\n",
+ " # reset/set adapter\n",
+ " peft_config = IA3Config(\n",
+ " target_modules=[ \"fc2\", \"Wqkv\",], \n",
+ " feedforward_modules=[\"fc2\"],\n",
+ " inference_mode=False,\n",
+ " )\n",
+ " model = get_peft_model(model, peft_config)\n",
+ " model.config.use_cache = False\n",
+ "\n",
+ " # train adapter\n",
+ " s = sample['text']\n",
+ " first_half = s[:len(s)//2]\n",
+ " second_half = s[len(s)//2:]\n",
+ " input_ids = tokenizer(first_half, return_tensors=\"pt\")[\"input_ids\"][0].to('cuda')\n",
+ " device = 'cuda'\n",
+ " optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)\n",
+ " model.train()\n",
+ " model = model.to(device)\n",
+ " for epoch in range(1):\n",
+ " for i in range(1, len(input_ids)):\n",
+ " X = input_ids[:i][None, ]\n",
+ " targets = input_ids[i:i+1][None, ]\n",
+ " optimizer.zero_grad()\n",
+ " out = model(input_ids=X, \n",
+ " )\n",
+ " logits = out['logits'][:, -1]\n",
+ " loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))\n",
+ " loss.backward()\n",
+ " optimizer.step()\n",
+ " # print(loss.item())\n",
+ "\n",
+ " # eval\n",
+ " model.eval();\n",
+ " with torch.no_grad():\n",
+ " with model.disable_adapter():\n",
+ " results = perplexity_compute(data=second_half, model=model, tokenizer=tokenizer, device='cuda')\n",
+ " results['mean_perplexity']\n",
+ " results2 = perplexity_compute(data=second_half, model=model, tokenizer=tokenizer, device='cuda')\n",
+ "\n",
+ " return dict(before=results['mean_perplexity'], after=results2['mean_perplexity'])\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data = []\n",
+ "for sample in samples:\n",
+ " r = lora_eval(model, sample)\n",
+ " r.update(sample)\n",
+ " data.append(r)\n",
+ " 1/0\n",
+ " print(data[-1])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print('perplexity (on 2nd half) before and after training adapter on first half of text')\n",
+ "df = pd.DataFrame(data)\n",
+ "df"
]
},
{
@@ -335,10 +392,21 @@
"execution_count": null,
"metadata": {},
"outputs": [],
- "source": [
- "results2 = perplexity_compute(data=sample['text'], model=model, tokenizer=tokenizer, device='cuda')\n",
- "results['mean_perplexity'], results2['mean_perplexity']"
- ]
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
},
{
"cell_type": "code",