Files
GENIES/nbs/01_mjc_convert_data_to_preference.ipynb
T
wassname 720d0ce333 misc
2024-08-25 15:06:10 +08:00

4012 lines
134 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# convert `genies` datasets to [open_pref_eval](https://github.com/wassname/open_pref_eval)\n",
"\n",
"\n",
"Here I'm taking the GENIE datasets, and \n",
"1. converting them to preference (compatible with open_pref_eval)\n",
"2. hosting on huggingface"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"## Setup\n",
"\n",
"```sh\n",
"python -m venv .venv --prompt GENIES\n",
". .venv/bin/activate\n",
"pip install wheel fire requests\n",
"pip install -r requirements.txt\n",
"python ./download_data.py\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"%reload_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from datasets import load_dataset\n",
"import datasets\n",
"\n",
"from pathlib import Path\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'source': 'alpaca_easy', 'target': 'alpaca_hard'},\n",
" {'source': 'arc_easy', 'target': 'arc_hard'},\n",
" {'source': 'math_easy', 'target': 'math_hard'},\n",
" {'source': 'code_easy', 'target': 'code_hard'},\n",
" {'source': 'ranking_logic_easy', 'target': 'ranking_logic_hard'},\n",
" {'source': 'raven_easy', 'target': 'raven_matrices'},\n",
" {'source': 'alpaca_mmlu', 'target': 'spanish_input'},\n",
" {'source': 'alpaca_mmlu', 'target': 'spanish_output'},\n",
" {'source': 'alpaca_mmlu', 'target': 'comma_separated_input'},\n",
" {'source': 'alpaca_mmlu', 'target': 'comma_separated_output'},\n",
" {'source': 'alpaca_mmlu', 'target': 'ranking_logic'},\n",
" {'source': 'alpaca_mmlu', 'target': 'raven_matrices'},\n",
" {'source': 'alpaca_mmlu', 'target': 'word_swap'},\n",
" {'source': 'code', 'target': 'counterfactual_python'},\n",
" {'source': 'code', 'target': 'us_history'},\n",
" {'source': 'code', 'target': 'change_my_view'},\n",
" {'source': 'cooking', 'target': 'math'},\n",
" {'source': 'cooking', 'target': 'raven_matrices'},\n",
" {'source': 'math', 'target': 'change_my_view'},\n",
" {'source': 'math', 'target': 'cooking'},\n",
" {'source': 'change_my_view', 'target': 'raven_matrices'},\n",
" {'source': 'change_my_view', 'target': 'cooking'},\n",
" {'source': 'raven_matrices', 'target': 'us_history'},\n",
" {'source': 'raven_matrices', 'target': 'code'},\n",
" {'source': 'us_history', 'target': 'math'},\n",
" {'source': 'us_history', 'target': 'code'},\n",
" {'source': 'us_history', 'target': 'us_history_textbook'},\n",
" {'source': 'us_history_textbook', 'target': 'us_history_fiction'},\n",
" {'source': 'us_history_fiction', 'target': 'us_history_make_questions'},\n",
" {'source': 'us_history_make_questions', 'target': 'us_history'},\n",
" {'source': 'math', 'target': 'math_fiction'},\n",
" {'source': 'math_fiction', 'target': 'math_textbook'},\n",
" {'source': 'math_textbook', 'target': 'math_make_questions'},\n",
" {'source': 'math_make_questions', 'target': 'math'},\n",
" {'source': 'alpaca_low_quality', 'target': 'alpaca_high_quality'},\n",
" {'source': 'shp_low_quality', 'target': 'shp_high_quality'},\n",
" {'source': 'code_low_quality', 'target': 'code'},\n",
" {'source': 'alpaca_mmlu', 'target': 'truthful_qa'},\n",
" {'source': 'alpaca_mmlu', 'target': 'personality_traits'},\n",
" {'source': 'alpaca_mmlu', 'target': 'survival_influence'},\n",
" {'source': 'alpaca_mmlu', 'target': 'gender_bias'},\n",
" {'source': 'alpaca_mmlu', 'target': 'punishment_avoidance'},\n",
" {'source': 'alpaca_mmlu', 'target': 'reward_seeking'},\n",
" {'source': 'alpaca_mmlu', 'target': 'crt_1'},\n",
" {'source': 'alpaca_mmlu', 'target': 'crt_2'},\n",
" {'source': 'alpaca_mmlu', 'target': 'crt_3'},\n",
" {'source': 'alpaca_mmlu',\n",
" 'target': 'sycophancy_mimicry',\n",
" 'target_reference': 'quote_attribution'},\n",
" {'source': 'alpaca_mmlu',\n",
" 'target': 'sycophancy_answer',\n",
" 'target_reference': 'arc_easy'},\n",
" {'source': 'alpaca_mmlu',\n",
" 'target': 'sycophancy_feedback',\n",
" 'target_reference': 'code_is_correct'},\n",
" {'source': 'alpaca_chat',\n",
" 'target': 'sycophancy_are_you_sure',\n",
" 'target_reference': 'arc_easy'},\n",
" {'source': 'pursue_goals', 'target': 'relinquish_power'},\n",
" {'source': 'creative_writing', 'target': 'biology_with_literary_style'},\n",
" {'source': 'alpaca_short',\n",
" 'target': 'alpaca_long',\n",
" 'target_reference': 'alpaca_mmlu'},\n",
" {'source': 'alpaca_chat', 'target': 'illegal_dont_help'},\n",
" {'source': 'alpaca_mmlu', 'target': 'wrong_arc'},\n",
" {'source': 'alpaca_mmlu', 'target': 'unhelpful_alpaca'},\n",
" {'source': 'alpaca_mmlu', 'target': 'truthful_qa'},\n",
" {'source': 'alpaca_mmlu', 'target': 'personality_traits'},\n",
" {'source': 'alpaca_mmlu', 'target': 'gender_bias'},\n",
" {'source': 'alpaca_mmlu', 'target': 'survival_influence'},\n",
" {'source': 'alpaca_mmlu', 'target': 'punishment_avoidance'},\n",
" {'source': 'alpaca_mmlu', 'target': 'reward_seeking'},\n",
" {'source': 'alpaca_mmlu', 'target': 'crt_1'},\n",
" {'source': 'alpaca_mmlu', 'target': 'crt_2'},\n",
" {'source': 'alpaca_mmlu', 'target': 'crt_3'},\n",
" {'source': 'alpaca_mmlu',\n",
" 'target': 'sycophancy_mimicry',\n",
" 'target_reference': 'quote_attribution'},\n",
" {'source': 'alpaca_mmlu',\n",
" 'target': 'sycophancy_answer',\n",
" 'target_reference': 'arc_easy'},\n",
" {'source': 'alpaca_mmlu',\n",
" 'target': 'sycophancy_feedback',\n",
" 'target_reference': 'code_is_correct'},\n",
" {'source': 'alpaca_chat',\n",
" 'target': 'sycophancy_are_you_sure',\n",
" 'target_reference': 'arc_easy'}]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"path_to_distribution_shift_pairs = Path('../distribution_shifts/all.json')\n",
"pairs_data = json.load(open(path_to_distribution_shift_pairs))\n",
"pairs_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"from datasets import DatasetInfo, Dataset\n",
"\n",
"def genie2ds(train: list) -> pd.DataFrame:\n",
" \"\"\"takes the GENIE format and convert it to to a dataframe of preference format.\"\"\"\n",
" outs = []\n",
" for i, row in enumerate(train):\n",
" s = pd.Series(row['responses'])\n",
" chosen = s[s==1].index[0]\n",
" rejected = s[s==0].index\n",
" outs += [dict(prompt=row['prompt'], chosen=chosen, rejected=r, i=i) for r in rejected]\n",
"\n",
" df = pd.DataFrame(outs)\n",
" return df\n",
"\n",
"\n",
"\n",
"def json2ds(source_dir: Path) -> Dataset:\n",
" test = json.load(open(source_dir / 'test.json'))\n",
" train = json.load(open(source_dir / 'train.json'))\n",
" metadata = json.load(open(source_dir / 'metadata.json'))\n",
" ds_info = DatasetInfo(\n",
" description= f\"GENIE:{metadata['id']}\",\n",
" citation= \"\"\"@misc{clymer2023generalizationanalogiestestbedgeneralizing,\n",
" title={Generalization Analogies: A Testbed for Generalizing AI Oversight to Hard-To-Measure Domains}, \n",
" author={Joshua Clymer and Garrett Baker and Rohan Subramani and Sam Wang},\n",
" year={2023},\n",
" eprint={2311.07723},\n",
" archivePrefix={arXiv},\n",
" primaryClass={cs.AI},\n",
" url={https://arxiv.org/abs/2311.07723}, \n",
" }\"\"\",\n",
" homepage= \"https://joshuaclymer.github.io/generalization-analogies-website/\",\n",
" license= \"MIT\",\n",
" config_name=f\"{metadata['id']}\",\n",
" )\n",
"\n",
"\n",
" df_train = genie2ds(train)\n",
" df_test = genie2ds(test)\n",
" dataset2 = datasets.DatasetDict(\n",
" {'train': datasets.Dataset.from_pandas(df_train, info=ds_info),\n",
" 'test': datasets.Dataset.from_pandas(df_test, info=ds_info)}\n",
" )\n",
" return dataset2"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset alpaca_easy already exists, skipping\n",
"Dataset alpaca_hard already exists, skipping\n",
"Dataset arc_easy already exists, skipping\n",
"Dataset arc_hard already exists, skipping\n",
"Dataset math_easy already exists, skipping\n",
"Dataset math_hard already exists, skipping\n",
"Dataset code_easy already exists, skipping\n",
"Dataset code_hard already exists, skipping\n",
"Dataset ranking_logic_easy already exists, skipping\n",
"Dataset ranking_logic_hard already exists, skipping\n",
"Dataset raven_easy already exists, skipping\n",
"Dataset raven_matrices already exists, skipping\n",
"Dataset alpaca_mmlu already exists, skipping\n",
"Dataset spanish_input already exists, skipping\n",
"Dataset alpaca_mmlu already exists, skipping\n",
"Dataset spanish_output already exists, skipping\n",
"Dataset alpaca_mmlu already exists, skipping\n",
"Dataset comma_separated_input already exists, skipping\n",
"Dataset alpaca_mmlu already exists, skipping\n",
"Dataset comma_separated_output already exists, skipping\n",
"Dataset alpaca_mmlu already exists, skipping\n",
"Dataset ranking_logic already exists, skipping\n",
"Dataset alpaca_mmlu already exists, skipping\n",
"Dataset raven_matrices already exists, skipping\n",
"Dataset alpaca_mmlu already exists, skipping\n",
"Dataset word_swap already exists, skipping\n",
"Dataset code already exists, skipping\n",
"Dataset counterfactual_python already exists, skipping\n",
"Dataset code already exists, skipping\n",
"Dataset us_history already exists, skipping\n",
"Dataset code already exists, skipping\n",
"Dataset change_my_view already exists, skipping\n",
"Dataset cooking already exists, skipping\n",
"Dataset math already exists, skipping\n",
"Dataset cooking already exists, skipping\n",
"Dataset raven_matrices already exists, skipping\n",
"Dataset math already exists, skipping\n",
"Dataset change_my_view already exists, skipping\n",
"Dataset math already exists, skipping\n",
"Dataset cooking already exists, skipping\n",
"Dataset change_my_view already exists, skipping\n",
"Dataset raven_matrices already exists, skipping\n",
"Dataset change_my_view already exists, skipping\n",
"Dataset cooking already exists, skipping\n",
"Dataset raven_matrices already exists, skipping\n",
"Dataset us_history already exists, skipping\n",
"Dataset raven_matrices already exists, skipping\n",
"Dataset code already exists, skipping\n",
"Dataset us_history already exists, skipping\n",
"Dataset math already exists, skipping\n",
"Dataset us_history already exists, skipping\n",
"Dataset code already exists, skipping\n",
"Dataset us_history already exists, skipping\n",
"BuilderConfig 'us_history_textbook' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_hard', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'spanish_input', 'spanish_output', 'us_history', 'word_swap']\n",
"Dataset us_history_textbook does not exist, creating\n",
"../distributions/us_history_textbook us_history_textbook DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 1800\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 750\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "64f8ed4a4d7e489ea215ddcfeeca8733",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d364a6417df0409abdb3e8f78941f423",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ee74356695ae42f892da767bc8ebbe07",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b5448f84cfd2412b9eb26feae057c33b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b7648bac06d549db96c9861e6adf1605",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/13.0k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5b6c833232454de58e805591cb97b014",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data files: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "87a54cd53d714b32b7107c606fd57834",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data: 0%| | 0.00/443k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f74420b688c745b2958d83a514a010c3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data: 0%| | 0.00/181k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "eac9cfba8c564913a0f88175376d34ca",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Extracting data files: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "309fe3c69de3447da2111222a7173b60",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generating train split: 0%| | 0/1800 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "23e403079bd74c8e8e800a4b45c3f4a2",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generating test split: 0%| | 0/750 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset us_history_textbook already exists, skipping\n",
"BuilderConfig 'us_history_fiction' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_hard', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'spanish_input', 'spanish_output', 'us_history', 'us_history_textbook', 'word_swap']\n",
"Dataset us_history_fiction does not exist, creating\n",
"../distributions/us_history_fiction us_history_fiction DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 1755\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 750\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7c1e77df103b4c0c8db81692ea3c7ca4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8d85849a4f6c4b17b6a1f0bfa3fd7096",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "558233fde4bf41bbba4cc8d77450f0e5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ec8903518dca46f0b532a38bdeb785f1",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7cc0316a5d354ee3ae96ce28d535170f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/13.0k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "144069ffe9414368b0a23c7aef74c2cd",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/13.5k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "fa22e7ee1461473db1b113cb36562105",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data files: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9082134250ae4e828e166e360d300193",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data: 0%| | 0.00/494k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2c143346b223411192bffd7ebba26799",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data: 0%| | 0.00/207k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c22fbee6efb447dfb2314ca8a93c3bb8",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Extracting data files: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5c3a0e7c125d444984b1b2403e2766c0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generating train split: 0%| | 0/1755 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e2456377bf33436586d757e320b65afc",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generating test split: 0%| | 0/750 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset us_history_fiction already exists, skipping\n",
"BuilderConfig 'us_history_make_questions' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_hard', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_textbook', 'word_swap']\n",
"Dataset us_history_make_questions does not exist, creating\n",
"../distributions/us_history_make_questions us_history_make_questions DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 1800\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 750\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f475d284f92045ee8d4e647c7da68327",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6cae739b22d94890b32c4fe78fb5ca51",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f99d8858e1fb401586cca4bca0ad6e4d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1b5fd5cc92d64de68a8d819c3f6d1aa9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "98ab0460afad474aa247fc10fff5f317",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/13.5k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2fde9050a08d4f1b9638bb8383769aea",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/14.0k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "742a37f4a45b4fffad1aabcbf0abd890",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data files: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e45b0b7888fb4b0abb2472e7ae5e3884",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data: 0%| | 0.00/285k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b6c681f8dbbe4073a0365f5654a6d12a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data: 0%| | 0.00/120k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "33aa7e2936684d228f0ffb615cf36558",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Extracting data files: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4149139bc5e643aca365701f065779bb",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generating train split: 0%| | 0/1800 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "cf00ecfb5a2841f197cf8e3f60d209f5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generating test split: 0%| | 0/750 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset us_history_make_questions already exists, skipping\n",
"Dataset us_history already exists, skipping\n",
"Dataset math already exists, skipping\n",
"BuilderConfig 'math_fiction' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_hard', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset math_fiction does not exist, creating\n",
"../distributions/math_fiction math_fiction DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 1800\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 750\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f9bd3b5ce7ed4942855cba161817f79a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4ce12f13ad1d48128e9a10a0e78f1182",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4cca50f8060a43dfaca6c98b1e92a4d8",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "66f1df3d3cfa43958caf2df37fdbcfdb",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "785277fd5ff94c10823a6057fdc1e029",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/14.0k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "72cdfcda9e894a7d8a52227989b802ac",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/14.5k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "77dd33a4637e42b3981d9289f95f17bd",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data files: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "787b57d6c2434ee2b7da3e0927448402",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data: 0%| | 0.00/357k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "863d6b67e67e4e6ab69a3c329062845d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data: 0%| | 0.00/150k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "eade88b461d2402195ed4cd5bacedcee",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Extracting data files: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6663af1bed1e437fb385e2e880d22945",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generating train split: 0%| | 0/1800 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8bf567590acb412aa266c94f17ffae62",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generating test split: 0%| | 0/750 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset math_fiction already exists, skipping\n",
"BuilderConfig 'math_textbook' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset math_textbook does not exist, creating\n",
"../distributions/math_textbook math_textbook DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 1800\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 750\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "52cbc1499e0e4691b1ab9d1e5e1fc048",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "03ddc85680c74cd99d4501240501bc89",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "07e9b6680dba4af6beb7d29267b58744",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "10b4bef712944e5dacaf8d81eae4fd62",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "950b1155d8264294bafa02951666dbe3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/14.5k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "113f5bd0da3a4cf3b9d5d9348a262417",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/15.0k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0ea35b9714194746bebd9b9f9c9c92aa",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data files: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a75bd15152b24616afad11fd5f7fb70d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data: 0%| | 0.00/390k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8f3071f4d09e4e9c8e18977f5deaba95",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data: 0%| | 0.00/160k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2a9beed337d643cda3b1929db189c06f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Extracting data files: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8d9781ab17b549188c107c8ad6237539",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generating train split: 0%| | 0/1800 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0850f19c6f144c6e97cf62a3f1a5568f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generating test split: 0%| | 0/750 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset math_textbook already exists, skipping\n",
"BuilderConfig 'math_make_questions' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_textbook', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset math_make_questions does not exist, creating\n",
"../distributions/math_make_questions math_make_questions DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 1773\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 750\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6c4700b8e0854f309b2d12e661c0166d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3368ba4166db46d1afbda5bdcee6112c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6ce8efd6afca49d2be32d82f445e6841",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e78316f01b2945c28c32f5996a5bc00f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9f2a673da3764a3d942e071b8b275e93",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/15.0k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8d0876707a004891b155bd150f3669bd",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/15.5k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "da01b935f1ad45089e71ef4180f38b73",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data files: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1331eb4b58ab4197ad45aa3d03418551",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data: 0%| | 0.00/160k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "43885c3e847b420a9318d64a98b2c0eb",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading data: 0%| | 0.00/72.2k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "455005f0ea874a699eb425235ec5b9ea",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Extracting data files: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d3128b7ef7874e3bb1a9fd530b38d8ed",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generating train split: 0%| | 0/1773 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "322e18f8d34e4372952950fe6d568ce7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Generating test split: 0%| | 0/750 [00:00<?, ? examples/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset math_make_questions already exists, skipping\n",
"Dataset math already exists, skipping\n",
"BuilderConfig 'alpaca_low_quality' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset alpaca_low_quality does not exist, creating\n",
"../distributions/alpaca_low_quality alpaca_low_quality DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 1800\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 750\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2409b96723c44dc09eb9044e6b946ac2",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f5a51fd3259444c8b487f53061c3df82",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "54bb11f327ba4f1db64b36cd338317d0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7106631024be4494bc6559fe1d16e7af",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2f77a3995c4c4095bc20caccc43e15a6",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/15.5k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "75e09b74f3d941b89ad4d503f26e012f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/16.1k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"BuilderConfig 'alpaca_high_quality' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset alpaca_high_quality does not exist, creating\n",
"../distributions/alpaca_high_quality alpaca_high_quality DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 1800\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 750\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "57a1d35f2a48431eb1bef3e2c8a53266",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f936d68fe3584d4d94c3fd22b32ae0a7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "01db2c1ba9c14675b63c4f59a328ef02",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9e315a9ee6524e29a160fdc7209411f5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ca6d9d622bef4299a3f85b2da7339c06",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/16.1k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "935a0accb3404e4384b103c2d4df43ba",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/16.6k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"BuilderConfig 'shp_low_quality' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset shp_low_quality does not exist, creating\n",
"../distributions/shp_low_quality shp_low_quality DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 600\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 250\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7c684f29099347f78b1587c52f35cae6",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "609ef330399c458683b828b5c90fbbce",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a68ffe11dbaf426e83156f6b462113ac",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "83fe2a33f6984df5a7583e872057b90a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f45c86ef659e4a28839b021e15d13918",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/16.6k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "bc3b6e1e885a49dd820f5d00d884c3c2",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/17.1k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"BuilderConfig 'shp_high_quality' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'shp_low_quality', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset shp_high_quality does not exist, creating\n",
"../distributions/shp_high_quality shp_high_quality DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 600\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 250\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f17c3187ef484521bfcb11318c5cb295",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f61adb6431584cb8a9cd70c98c489917",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ffe55fc2be2f4b23802252fc540fcd2e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "58cea79219b34af395c181326de460b2",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "208b2cc247664a2792fb5dbc9a76a945",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/17.1k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "282d2f0e4e5f46849a415468cbc1541f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/17.6k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"BuilderConfig 'code_low_quality' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset code_low_quality does not exist, creating\n",
"../distributions/code_low_quality code_low_quality DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 1200\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 500\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "007e1405af624f8ead47fd68ffe6baff",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ee11ce0f8cf3441ab46400028a60000f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0f118b136ae844c4a77493f9a33b1939",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "37dc54e3658345bc9ab155cd0372c32f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "fb31a0ee341b4b72bed9110dcf4aaf59",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/17.6k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "43a80adf6cb64965b043838c94942fb7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/18.1k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset code already exists, skipping\n",
"Dataset alpaca_mmlu already exists, skipping\n",
"BuilderConfig 'truthful_qa' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset truthful_qa does not exist, creating\n",
"../distributions/truthful_qa truthful_qa DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 1302\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 750\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e4f58817f88244aeb71796207ea8a027",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "39d95fd366e748168603496723156459",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "faaacd18bea14a62abc313ef2194c0d1",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6c67c119be5a44679856925d7ee25195",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9cd86e4ea0b24d058dc098679cf2ea1d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/18.1k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "00d75e7c58304c9bba8ef446eba8d927",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/18.6k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset alpaca_mmlu already exists, skipping\n",
"BuilderConfig 'personality_traits' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset personality_traits does not exist, creating\n",
"../distributions/personality_traits personality_traits DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 1200\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 500\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6583fe8b252244aab6a309c55774a0d1",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "203ccd963c1f48059cdaa0dfb940c2f7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8cbf18862f2c46258ea1836005748a10",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3e29800690ed427ab2acd429ccbdc197",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b3d5c1a5106049f7ac7dd5bc68c713dd",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/18.6k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ca52356c856048b8b09d31e72a0e50e9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/19.1k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset alpaca_mmlu already exists, skipping\n",
"BuilderConfig 'survival_influence' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset survival_influence does not exist, creating\n",
"../distributions/survival_influence survival_influence DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 600\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 600\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "149d5cba40c74a719b0fc92acf1f1bca",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d93622d4af9c4a2d8878abcda299924b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "743c1b26301d438f895b5d77c8eb9571",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "cef52e24cc76488191f83311e4197a16",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ac5bba77de6446f3bcfb6aef060378a8",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/19.1k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "881976fd65a645b1970133db459022e0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/19.6k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset alpaca_mmlu already exists, skipping\n",
"BuilderConfig 'gender_bias' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset gender_bias does not exist, creating\n",
"../distributions/gender_bias gender_bias DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 1200\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 500\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a5b0f4e37ee84a5e8f23dac41fea52fd",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c7b9158418e74e9494873f85ac26bff4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7aa4f70444d942a1a6c7bc1329ffd907",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0ec3121788f74862a056274bf1c33832",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2b80ba76cae54fb0a3d79d5c01d0f719",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/19.6k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a9e17377d4324921a29069182f354400",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/20.1k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset alpaca_mmlu already exists, skipping\n",
"BuilderConfig 'punishment_avoidance' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset punishment_avoidance does not exist, creating\n",
"../distributions/punishment_avoidance punishment_avoidance DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 1800\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 750\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e8584729dd9949e984498ab5873868ac",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "392871e51e5f402995126425d9dcd6aa",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f37cdcf5d62e40ba9864a9b96560cff7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0372ccd14a7943b0ada3fe5303d5891b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b6bd28ac8499420c92d0b77bd2d7e67c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/20.1k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "88f6b0fd435042c9acd7499a835dd35d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/20.6k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset alpaca_mmlu already exists, skipping\n",
"BuilderConfig 'reward_seeking' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset reward_seeking does not exist, creating\n",
"../distributions/reward_seeking reward_seeking DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 1800\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 1800\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ecbf377a0404409a9c2fa73b4fd8d47c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "12bfd0679611453fabada02ee3c1ca47",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "90b49da2376a4b68815b2d6dd217047a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ec6336a30f5847f88950a2fea7777dc5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d4c496d421d842efba4bfbb3e3584cbc",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/20.6k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "376f2b5e2b94445187d1b4e75783479e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/21.1k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset alpaca_mmlu already exists, skipping\n",
"BuilderConfig 'crt_1' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset crt_1 does not exist, creating\n",
"../distributions/crt_1 crt_1 DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 600\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 250\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "831bc5c00d934aa0a55eef982371437f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8938f8cdcd2a4b79b4c14660a5dbc9ad",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9ebfbc4ae92f4f3e970d22b2b43877b8",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0a64793f4adb40438c1e8faa56a026da",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ffdbd505aeeb41f383810269d03b2d8f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/21.1k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f632c120c2e7420b9ddf062530dd5e8c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/21.6k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset alpaca_mmlu already exists, skipping\n",
"BuilderConfig 'crt_2' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset crt_2 does not exist, creating\n",
"../distributions/crt_2 crt_2 DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 600\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 250\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "bc6700b1f7e743f09462ba911cad20df",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "394b2b98b44b4de6914717b822680fa4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "75d75721b9044d76a858047e115ba023",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1d22f3c0203341769f2626c0bdf8d2e4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "acd2fa02b7884687bc49c0c156822cc8",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/21.6k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ec36fbcd63e4482ca9388676aa090563",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/22.0k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset alpaca_mmlu already exists, skipping\n",
"BuilderConfig 'crt_3' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'crt_2', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset crt_3 does not exist, creating\n",
"../distributions/crt_3 crt_3 DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 600\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 250\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5a20c32fa8934ecbb94ac9b5c9843817",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "fa7164751d1441f593a5a933d8af966b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3fe557256d1b4636973f74b6d9384e9f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a6ea388d902e4b0a8cc46a7c0cae069b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f0096d60a8dd47688d0006ceace8b007",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/22.0k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5e7848f61d294c06bb51e4e1c8f9e6bb",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/22.5k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset alpaca_mmlu already exists, skipping\n",
"BuilderConfig 'sycophancy_mimicry' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset sycophancy_mimicry does not exist, creating\n",
"../distributions/sycophancy_mimicry sycophancy_mimicry DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 600\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 250\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d019cec01ff0411d9a17cc1f78a44083",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1f922df53bb14ef2b4ee5aab44dd963b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "19cbf89d1e63406892b6157f0ddd0d30",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "84b1334b945c466ab86b36f9e5ac3f96",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2cd0b33d5bc94009858412abe4bcf191",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/22.5k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4b39e7d4741742dab42071b97ba47b07",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/23.0k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset alpaca_mmlu already exists, skipping\n",
"BuilderConfig 'sycophancy_answer' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset sycophancy_answer does not exist, creating\n",
"../distributions/sycophancy_answer sycophancy_answer DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 600\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 250\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a51528cb2a444bc6ba40ee4133256965",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d65e3c8fdb8f47328ba92247dce89c07",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7dbc55c6e0574c66a14cda4f08efd1ee",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e5715f255e714da986b34b0333260192",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b776bb69b68649e095638a79812ec904",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/23.0k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "758742e2c6cd42ceb57484b09c9177f5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/23.5k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset alpaca_mmlu already exists, skipping\n",
"BuilderConfig 'sycophancy_feedback' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_answer', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset sycophancy_feedback does not exist, creating\n",
"../distributions/sycophancy_feedback sycophancy_feedback DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 600\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 250\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7eb845a1b4c9415c9f37ac3565fff55c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f2a3a35411414f7e94d9d9e261f6a014",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8f9d99aa3f954799b175484b012f9036",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0ccca1566cd54e9da1ea7de379ef904d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7f390e2b922342a8936c7374fc3fbade",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/23.5k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ac6fdc2035bf4137b4e54ea5b8639ca5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/24.0k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"BuilderConfig 'alpaca_chat' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_answer', 'sycophancy_feedback', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset alpaca_chat does not exist, creating\n",
"../distributions/alpaca_chat alpaca_chat DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 1800\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 750\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d3657cb12a014fcfb7e27d885b31c5b9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "df312216a4ed47dd9ba2ff4dafe58641",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "32930f9597704c3fb722b0aed0458379",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1237066883ca48cdb888e6a6fe9c3b35",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d16226d64c5f4ae3bc704e75f09b1e13",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/24.0k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9c53819aa64d40b6bd77634af848b7ea",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/24.5k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"BuilderConfig 'sycophancy_are_you_sure' not found. Available: ['alpaca_chat', 'alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_answer', 'sycophancy_feedback', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset sycophancy_are_you_sure does not exist, creating\n",
"../distributions/sycophancy_are_you_sure sycophancy_are_you_sure DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 600\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 250\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "cc39ff0888f5483badfaa215a1e27500",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "056c0f093fb2416585aa15c0c10c1cd6",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e0713c9077884525ac8317f6861e4707",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3f8801533b9a4b21b78dae5c158ae42b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5626c965bc3c47a5a825abb68bb35e1c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/24.5k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e54e6dd8815e47c4ba64be73281116e3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/25.0k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"BuilderConfig 'pursue_goals' not found. Available: ['alpaca_chat', 'alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_answer', 'sycophancy_are_you_sure', 'sycophancy_feedback', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset pursue_goals does not exist, creating\n",
"../distributions/pursue_goals pursue_goals DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 600\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 250\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a6060b344e6e4457a0f6afd9fb1dc8ed",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "35131258c8a941138759282afa1757f2",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "24cc96dceac8467c9d37fe57dc3e20c0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e8a449b167584611a828741db0547a77",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "196b9fc3625947e2ae3f953698a6b1a4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/25.0k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a1364eb05ab04f40a4d63eac885e20cc",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/25.5k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"BuilderConfig 'relinquish_power' not found. Available: ['alpaca_chat', 'alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'pursue_goals', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_answer', 'sycophancy_are_you_sure', 'sycophancy_feedback', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset relinquish_power does not exist, creating\n",
"../distributions/relinquish_power relinquish_power DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 600\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 250\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "19952fefa57e40cabea52438a26010a0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "63a6e0b69fae4137b7f34da22ff4d67c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b80dfe55a3d24e21affa9f8e7337d031",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "326e5afb8ad34ad382bfd616256acf0c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d30844456008421f87014627b9d70ce8",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/25.5k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "58e20c49d5ef42efb1afdc9716f636f3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/26.0k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"BuilderConfig 'creative_writing' not found. Available: ['alpaca_chat', 'alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'pursue_goals', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'relinquish_power', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_answer', 'sycophancy_are_you_sure', 'sycophancy_feedback', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset creative_writing does not exist, creating\n",
"../distributions/creative_writing creative_writing DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 1200\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 500\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2a4d09db8e21494aa181faf60cbe9533",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6cc92b72780b4d8a888aa580a4aa3bbf",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4c68ef7b058a4f2796ad267e1e8c12f9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2110d0c3780e4d1c915bc541ce625501",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6220ed16cf794a80b9f34d7a62f6b5b0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/26.0k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4967256f3d414e6a9cbf65b37887dbb2",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/26.5k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"BuilderConfig 'biology_with_literary_style' not found. Available: ['alpaca_chat', 'alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'creative_writing', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'pursue_goals', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'relinquish_power', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_answer', 'sycophancy_are_you_sure', 'sycophancy_feedback', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset biology_with_literary_style does not exist, creating\n",
"../distributions/biology_with_literary_style biology_with_literary_style DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 1800\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 750\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c1b1674e403f4e78af6a2bd732c36a35",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4ddd7ed1d46548ad993b52e3db4c7047",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/2 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "81f3b65e9f504d988d62871d49cf3249",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9d6035ab91cf4fd197b3e6a817b3c7e9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7cc275725608440399701e5d76981115",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/26.5k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b997ebdaba1b4f0dad7579947dbe3e35",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/27.1k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"BuilderConfig 'alpaca_short' not found. Available: ['alpaca_chat', 'alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'biology_with_literary_style', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'creative_writing', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'pursue_goals', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'relinquish_power', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_answer', 'sycophancy_are_you_sure', 'sycophancy_feedback', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset alpaca_short does not exist, creating\n",
"../distributions/alpaca_short alpaca_short DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 600\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 250\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e90ac05495d8432a8bdf979fed560563",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "fac373e1eb6c4056b5ae6942cdbd9481",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0d12b77429ba4144a5cd05a69c6e7a54",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c38b919038c64b6589c506c345606a04",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7f9aace5c3c1429f922eb9be86af73f0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/27.1k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "11888e352454492d863d485b682bb589",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/27.6k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"BuilderConfig 'alpaca_long' not found. Available: ['alpaca_chat', 'alpaca_easy', 'alpaca_hard', 'alpaca_high_quality', 'alpaca_low_quality', 'alpaca_mmlu', 'alpaca_short', 'arc_easy', 'arc_hard', 'biology_with_literary_style', 'change_my_view', 'code', 'code_easy', 'code_hard', 'code_low_quality', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'creative_writing', 'crt_1', 'crt_2', 'crt_3', 'gender_bias', 'math', 'math_easy', 'math_fiction', 'math_hard', 'math_make_questions', 'math_textbook', 'personality_traits', 'punishment_avoidance', 'pursue_goals', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'relinquish_power', 'reward_seeking', 'shp_high_quality', 'shp_low_quality', 'spanish_input', 'spanish_output', 'survival_influence', 'sycophancy_answer', 'sycophancy_are_you_sure', 'sycophancy_feedback', 'sycophancy_mimicry', 'truthful_qa', 'us_history', 'us_history_fiction', 'us_history_make_questions', 'us_history_textbook', 'word_swap']\n",
"Dataset alpaca_long does not exist, creating\n",
"../distributions/alpaca_long alpaca_long DatasetDict({\n",
" train: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 600\n",
" })\n",
" test: Dataset({\n",
" features: ['prompt', 'chosen', 'rejected', 'i'],\n",
" num_rows: 250\n",
" })\n",
"})\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "09622b9a0fe84003a43612cba0d7baa3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "81b20455ae1946c1854a76cf83d2052f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "70a205e4c58e406a8b263957f818d9c1",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d8ce769ae7f1423d8ee3432520da3a63",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Creating parquet from Arrow format: 0%| | 0/1 [00:00<?, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4b60afdf4e644647aee80d56c21433bb",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading metadata: 0%| | 0.00/27.6k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "318beee2f0fd4e1a86f6b8e8c62bc6a7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading readme: 0%| | 0.00/28.0k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"dist_dir = Path('../distributions')\n",
"\n",
"for pair in pairs_data:\n",
" for key in ['source', 'target']:\n",
" source_dir = dist_dir / pair[key]\n",
" metadata = json.load(open(source_dir / 'metadata.json'))\n",
" config_name = metadata['id']\n",
"\n",
" # first check if it exists\n",
" try:\n",
" load_dataset(f\"wassname/genie_dpo\", name=config_name, keep_in_memory=False, split='train[:1]')\n",
" except ValueError as e:\n",
" print(e)\n",
" print(f\"Dataset {config_name} does not exist, creating\")\n",
"\n",
" dataset2 = json2ds(source_dir)\n",
" print(source_dir, config_name, dataset2)\n",
"\n",
" dataset2.push_to_hub(\"wassname/genie_dpo\", config_name=config_name)\n",
" else:\n",
" print(f\"Dataset {config_name} already exists, skipping\")\n",
"\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}