diff --git a/.gitignore b/.gitignore index 1a5c7ea..2ea2b5b 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ __pycache__/ # jypter notebook .ipynb_checkpoints/ +PKG-INFO diff --git a/nbs/01_mjc_convert_data_to_preference.ipynb b/nbs/01_mjc_convert_data_to_preference.ipynb index cc807ce..23f2f91 100644 --- a/nbs/01_mjc_convert_data_to_preference.ipynb +++ b/nbs/01_mjc_convert_data_to_preference.ipynb @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -163,25 +163,14 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'id': 'alpaca_easy', 'external_datasets': [], 'overlapping_datasets': []}" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [] }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -233,20 +222,84 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "../distributions/alpaca_easy alpaca_easy\n" + "Dataset alpaca_easy already exists, skipping\n", + "Dataset alpaca_hard already exists, skipping\n", + "Dataset arc_easy already exists, skipping\n", + "Dataset arc_hard already exists, skipping\n", + "Dataset math_easy already exists, skipping\n", + "Dataset math_hard already exists, skipping\n", + "Dataset code_easy already exists, skipping\n", + "Dataset code_hard already exists, skipping\n", + "Dataset ranking_logic_easy already exists, skipping\n", + "Dataset ranking_logic_hard already exists, skipping\n", + "Dataset raven_easy already exists, skipping\n", + "Dataset raven_matrices already exists, skipping\n", + "Dataset alpaca_mmlu already exists, skipping\n", + "Dataset spanish_input already exists, skipping\n", + "Dataset alpaca_mmlu already exists, skipping\n", + "Dataset spanish_output already exists, skipping\n", + "Dataset alpaca_mmlu already exists, skipping\n", + "Dataset comma_separated_input already exists, skipping\n", + "Dataset alpaca_mmlu already exists, skipping\n", + "Dataset comma_separated_output already exists, skipping\n", + "Dataset alpaca_mmlu already exists, skipping\n", + "Dataset ranking_logic already exists, skipping\n", + "Dataset alpaca_mmlu already exists, skipping\n", + "Dataset raven_matrices already exists, skipping\n", + "Dataset alpaca_mmlu already exists, skipping\n", + "Dataset word_swap already exists, skipping\n", + "Dataset code already exists, skipping\n", + "Dataset counterfactual_python already exists, skipping\n", + "Dataset code already exists, skipping\n", + "Dataset us_history already exists, skipping\n", + "Dataset code already exists, skipping\n", + "Dataset change_my_view already exists, skipping\n", + "Dataset cooking already exists, skipping\n", + "Dataset math already exists, skipping\n", + "Dataset cooking already exists, skipping\n", + "Dataset raven_matrices already exists, skipping\n", + "Dataset math already exists, skipping\n", + "Dataset change_my_view already exists, skipping\n", + "Dataset math already exists, skipping\n", + "Dataset cooking already exists, skipping\n", + "Dataset change_my_view already exists, skipping\n", + "Dataset raven_matrices already exists, skipping\n", + "Dataset change_my_view already exists, skipping\n", + "Dataset cooking already exists, skipping\n", + "Dataset raven_matrices already exists, skipping\n", + "Dataset us_history already exists, skipping\n", + "Dataset raven_matrices already exists, skipping\n", + "Dataset code already exists, skipping\n", + "Dataset us_history already exists, skipping\n", + "Dataset math already exists, skipping\n", + "Dataset us_history already exists, skipping\n", + "Dataset code already exists, skipping\n", + "Dataset us_history already exists, skipping\n", + "BuilderConfig 'us_history_textbook' not found. Available: ['alpaca_easy', 'alpaca_hard', 'alpaca_mmlu', 'arc_easy', 'arc_hard', 'change_my_view', 'code', 'code_easy', 'code_hard', 'comma_separated_input', 'comma_separated_output', 'cooking', 'counterfactual_python', 'math', 'math_easy', 'math_hard', 'ranking_logic', 'ranking_logic_easy', 'ranking_logic_hard', 'raven_easy', 'raven_matrices', 'spanish_input', 'spanish_output', 'us_history', 'word_swap']\n", + "Dataset us_history_textbook does not exist, creating\n", + "../distributions/us_history_textbook us_history_textbook DatasetDict({\n", + " train: Dataset({\n", + " features: ['prompt', 'chosen', 'rejected', 'i'],\n", + " num_rows: 1800\n", + " })\n", + " test: Dataset({\n", + " features: ['prompt', 'chosen', 'rejected', 'i'],\n", + " num_rows: 750\n", + " })\n", + "})\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "70cf450126b54157bab78571208e91d4", + "model_id": "64f8ed4a4d7e489ea215ddcfeeca8733", "version_major": 2, "version_minor": 0 }, @@ -260,56 +313,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4f8e24cda88f4fd88a44af913f193d89", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Pushing dataset shards to the dataset hub: 0%| | 0/1 [00:00