This commit is contained in:
wassname
2025-07-26 12:26:32 +08:00
parent 3950695c70
commit bdaa175759
8 changed files with 3866 additions and 387 deletions
+249 -227
View File
@@ -9,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@@ -19,7 +19,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@@ -30,7 +30,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -39,7 +39,7 @@
"1"
]
},
"execution_count": 3,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -55,7 +55,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
@@ -83,7 +83,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -96,7 +96,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
@@ -105,7 +105,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
@@ -150,7 +150,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 10,
"metadata": {},
"outputs": [
{
@@ -170,7 +170,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
@@ -202,7 +202,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
@@ -221,7 +221,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
@@ -246,7 +246,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
@@ -268,7 +268,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
@@ -290,7 +290,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -302,7 +302,7 @@
"})"
]
},
"execution_count": 14,
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -345,7 +345,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
@@ -427,18 +427,19 @@
},
{
"cell_type": "code",
"execution_count": 42,
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['before', 'after', 'hist', 'train/before', 'train/after', 'title', 'f', 'content', 'url', 'novelty', 'date', 'in_training'])"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
"ename": "NameError",
"evalue": "name 'data' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[18], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdata\u001b[49m[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mkeys()\n",
"\u001b[0;31mNameError\u001b[0m: name 'data' is not defined"
]
}
],
"source": [
@@ -1523,7 +1524,7 @@
},
{
"cell_type": "code",
"execution_count": 86,
"execution_count": 93,
"metadata": {},
"outputs": [
{
@@ -1548,305 +1549,305 @@
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>f</th>\n",
" <th>train/diff_sum</th>\n",
" <th>train/diff%_sum</th>\n",
" <th>novelty</th>\n",
" <th>in_training</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>../samples/2025_lw_review-planecrash.md</td>\n",
" <td>-339.481621</td>\n",
" <td>0.933950</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>../samples/2025_lw_2024-in-ai-predictions.md</td>\n",
" <td>-297.344904</td>\n",
" <td>0.789190</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>../samples/2025_lw_the-field-of-ai-alignment-a...</td>\n",
" <td>-288.831029</td>\n",
" <td>-57.706054</td>\n",
" <td>0.925483</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>../samples/2025_lw_the-intelligence-curse.md</td>\n",
" <td>-265.925183</td>\n",
" <td>-54.846207</td>\n",
" <td>0.688044</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>../samples/2025_lw_my-agi-safety-research-2024...</td>\n",
" <td>-261.054373</td>\n",
" <td>0.756925</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>../samples/2025_lw_comment-on-death-and-the-go...</td>\n",
" <td>-239.929921</td>\n",
" <td>0.750253</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>../samples/2025_lw_preference-inversion.md</td>\n",
" <td>-238.890490</td>\n",
" <td>-52.294728</td>\n",
" <td>0.633321</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>../samples/2025_lw_debating-buying-nvda-in-201...</td>\n",
" <td>-229.822011</td>\n",
" <td>0.526975</td>\n",
" <th>19</th>\n",
" <td>../samples/2025_lw_my-agi-safety-research-2024...</td>\n",
" <td>-49.874378</td>\n",
" <td>0.756925</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>../samples/2025_lw_review-planecrash.md</td>\n",
" <td>-49.581118</td>\n",
" <td>0.933950</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>../samples/2025_lw_the-subset-parity-learning-...</td>\n",
" <td>-224.939272</td>\n",
" <td>-47.673055</td>\n",
" <td>0.697946</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>../samples/2024_lesswrong_slop.md</td>\n",
" <td>-201.960423</td>\n",
" <td>0.100000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>../samples/2025_lw_human-study-on-ai-spear-phi...</td>\n",
" <td>-200.020488</td>\n",
" <td>0.677458</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>../samples/2024_openai_emails.md</td>\n",
" <td>-194.183420</td>\n",
" <td>0.700000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>../samples/2024_news_anthropic.md</td>\n",
" <td>-176.985800</td>\n",
" <td>0.500000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>../samples/2024_lw_by-default-capital-will-mat...</td>\n",
" <td>-169.866388</td>\n",
" <td>0.906388</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>../samples/2024_deliberative_alignment.md</td>\n",
" <td>-159.913571</td>\n",
" <td>0.600000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>../samples/2025_h5n1_report.md</td>\n",
" <td>-156.453212</td>\n",
" <td>0.750000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>../samples/2024_anthropic_palintir.md</td>\n",
" <td>-143.663636</td>\n",
" <td>0.200000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>../samples/2024_gwern_reddit.md</td>\n",
" <td>-142.434464</td>\n",
" <td>1.000000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>../samples/2024_lw_the-plan-2024-update.md</td>\n",
" <td>-136.823458</td>\n",
" <td>0.785170</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>../samples/2024_how_to_focus.md</td>\n",
" <td>-135.387340</td>\n",
" <td>0.500000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>../samples/2024_trump_appointment.md</td>\n",
" <td>-128.444646</td>\n",
" <td>-45.133784</td>\n",
" <td>0.300000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>../samples/lorem_ipsum.md</td>\n",
" <td>-127.469837</td>\n",
" <td>0.000000</td>\n",
" <td>True</td>\n",
" <th>8</th>\n",
" <td>../samples/2024_lesswrong_slop.md</td>\n",
" <td>-44.930269</td>\n",
" <td>0.100000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>../samples/2024_bob_fanfic2.md</td>\n",
" <td>-125.559481</td>\n",
" <td>0.400000</td>\n",
" <td>True</td>\n",
" <th>17</th>\n",
" <td>../samples/2025_lw_debating-buying-nvda-in-201...</td>\n",
" <td>-44.381423</td>\n",
" <td>0.526975</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>../samples/2025_h5n1_report.md</td>\n",
" <td>-44.069677</td>\n",
" <td>0.750000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>../samples/2024_lw_by-default-capital-will-mat...</td>\n",
" <td>-43.184356</td>\n",
" <td>0.906388</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>../samples/2025_lw_human-study-on-ai-spear-phi...</td>\n",
" <td>-41.879381</td>\n",
" <td>0.677458</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>../samples/2024_news_anthropic.md</td>\n",
" <td>-41.733471</td>\n",
" <td>0.500000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>../samples/2024_how_to_focus.md</td>\n",
" <td>-38.998412</td>\n",
" <td>0.500000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>../samples/2025_lw_comment-on-death-and-the-go...</td>\n",
" <td>-38.898521</td>\n",
" <td>0.750253</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>../samples/2024_openai_emails.md</td>\n",
" <td>-37.535013</td>\n",
" <td>0.700000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>../samples/2025_lw_2024-in-ai-predictions.md</td>\n",
" <td>-37.078135</td>\n",
" <td>0.789190</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>../samples/2024_gpt4_fake_paper.md</td>\n",
" <td>-122.651137</td>\n",
" <td>-35.067661</td>\n",
" <td>0.000000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>../samples/2024_deliberative_alignment.md</td>\n",
" <td>-34.208421</td>\n",
" <td>0.600000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>../samples/2024_anthropic_palintir.md</td>\n",
" <td>-31.636323</td>\n",
" <td>0.200000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>../samples/politics_is_the_mind_killer.md</td>\n",
" <td>-116.203043</td>\n",
" <td>-26.009715</td>\n",
" <td>0.500000</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>../samples/lorem_ipsum.md</td>\n",
" <td>-25.975428</td>\n",
" <td>0.000000</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>../samples/2025_lw_the-laws-of-large-numbers.md</td>\n",
" <td>-107.898850</td>\n",
" <td>-25.415931</td>\n",
" <td>0.540932</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>../samples/2024_bob_fanfic.md</td>\n",
" <td>-106.220317</td>\n",
" <td>0.300000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>../samples/2025_lw_what-s-the-short-timeline-p...</td>\n",
" <td>-102.163854</td>\n",
" <td>0.898161</td>\n",
" <th>6</th>\n",
" <td>../samples/2024_gwern_reddit.md</td>\n",
" <td>-25.314383</td>\n",
" <td>1.000000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>../samples/2025_lw_parkinson-s-law-and-the-ide...</td>\n",
" <td>-97.869906</td>\n",
" <td>-21.706555</td>\n",
" <td>0.677458</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>../samples/2024_lw_the-plan-2024-update.md</td>\n",
" <td>-20.314768</td>\n",
" <td>0.785170</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>../samples/2024_bob_fanfic2.md</td>\n",
" <td>-20.237010</td>\n",
" <td>0.400000</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>../samples/2024_bob_fanfic.md</td>\n",
" <td>-20.153819</td>\n",
" <td>0.300000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>../samples/2024_arxiv_meh.md</td>\n",
" <td>-73.838489</td>\n",
" <td>-19.115730</td>\n",
" <td>0.150000</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>../samples/2025_lw_what-s-the-short-timeline-p...</td>\n",
" <td>-16.388113</td>\n",
" <td>0.898161</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" f train/diff_sum \\\n",
"22 ../samples/2025_lw_review-planecrash.md -339.481621 \n",
"15 ../samples/2025_lw_2024-in-ai-predictions.md -297.344904 \n",
"23 ../samples/2025_lw_the-field-of-ai-alignment-a... -288.831029 \n",
"24 ../samples/2025_lw_the-intelligence-curse.md -265.925183 \n",
"19 ../samples/2025_lw_my-agi-safety-research-2024... -261.054373 \n",
"16 ../samples/2025_lw_comment-on-death-and-the-go... -239.929921 \n",
"21 ../samples/2025_lw_preference-inversion.md -238.890490 \n",
"17 ../samples/2025_lw_debating-buying-nvda-in-201... -229.822011 \n",
"26 ../samples/2025_lw_the-subset-parity-learning-... -224.939272 \n",
"8 ../samples/2024_lesswrong_slop.md -201.960423 \n",
"18 ../samples/2025_lw_human-study-on-ai-spear-phi... -200.020488 \n",
"12 ../samples/2024_openai_emails.md -194.183420 \n",
"11 ../samples/2024_news_anthropic.md -176.985800 \n",
"9 ../samples/2024_lw_by-default-capital-will-mat... -169.866388 \n",
"4 ../samples/2024_deliberative_alignment.md -159.913571 \n",
"14 ../samples/2025_h5n1_report.md -156.453212 \n",
"0 ../samples/2024_anthropic_palintir.md -143.663636 \n",
"6 ../samples/2024_gwern_reddit.md -142.434464 \n",
"10 ../samples/2024_lw_the-plan-2024-update.md -136.823458 \n",
"7 ../samples/2024_how_to_focus.md -135.387340 \n",
"13 ../samples/2024_trump_appointment.md -128.444646 \n",
"28 ../samples/lorem_ipsum.md -127.469837 \n",
"3 ../samples/2024_bob_fanfic2.md -125.559481 \n",
"5 ../samples/2024_gpt4_fake_paper.md -122.651137 \n",
"29 ../samples/politics_is_the_mind_killer.md -116.203043 \n",
"25 ../samples/2025_lw_the-laws-of-large-numbers.md -107.898850 \n",
"2 ../samples/2024_bob_fanfic.md -106.220317 \n",
"27 ../samples/2025_lw_what-s-the-short-timeline-p... -102.163854 \n",
"20 ../samples/2025_lw_parkinson-s-law-and-the-ide... -97.869906 \n",
"1 ../samples/2024_arxiv_meh.md -73.838489 \n",
" f train/diff%_sum \\\n",
"23 ../samples/2025_lw_the-field-of-ai-alignment-a... -57.706054 \n",
"24 ../samples/2025_lw_the-intelligence-curse.md -54.846207 \n",
"21 ../samples/2025_lw_preference-inversion.md -52.294728 \n",
"19 ../samples/2025_lw_my-agi-safety-research-2024... -49.874378 \n",
"22 ../samples/2025_lw_review-planecrash.md -49.581118 \n",
"26 ../samples/2025_lw_the-subset-parity-learning-... -47.673055 \n",
"13 ../samples/2024_trump_appointment.md -45.133784 \n",
"8 ../samples/2024_lesswrong_slop.md -44.930269 \n",
"17 ../samples/2025_lw_debating-buying-nvda-in-201... -44.381423 \n",
"14 ../samples/2025_h5n1_report.md -44.069677 \n",
"9 ../samples/2024_lw_by-default-capital-will-mat... -43.184356 \n",
"18 ../samples/2025_lw_human-study-on-ai-spear-phi... -41.879381 \n",
"11 ../samples/2024_news_anthropic.md -41.733471 \n",
"7 ../samples/2024_how_to_focus.md -38.998412 \n",
"16 ../samples/2025_lw_comment-on-death-and-the-go... -38.898521 \n",
"12 ../samples/2024_openai_emails.md -37.535013 \n",
"15 ../samples/2025_lw_2024-in-ai-predictions.md -37.078135 \n",
"5 ../samples/2024_gpt4_fake_paper.md -35.067661 \n",
"4 ../samples/2024_deliberative_alignment.md -34.208421 \n",
"0 ../samples/2024_anthropic_palintir.md -31.636323 \n",
"29 ../samples/politics_is_the_mind_killer.md -26.009715 \n",
"28 ../samples/lorem_ipsum.md -25.975428 \n",
"25 ../samples/2025_lw_the-laws-of-large-numbers.md -25.415931 \n",
"6 ../samples/2024_gwern_reddit.md -25.314383 \n",
"20 ../samples/2025_lw_parkinson-s-law-and-the-ide... -21.706555 \n",
"10 ../samples/2024_lw_the-plan-2024-update.md -20.314768 \n",
"3 ../samples/2024_bob_fanfic2.md -20.237010 \n",
"2 ../samples/2024_bob_fanfic.md -20.153819 \n",
"1 ../samples/2024_arxiv_meh.md -19.115730 \n",
"27 ../samples/2025_lw_what-s-the-short-timeline-p... -16.388113 \n",
"\n",
" novelty in_training \n",
"22 0.933950 False \n",
"15 0.789190 False \n",
"23 0.925483 False \n",
"24 0.688044 False \n",
"19 0.756925 False \n",
"16 0.750253 False \n",
"21 0.633321 False \n",
"17 0.526975 False \n",
"19 0.756925 False \n",
"22 0.933950 False \n",
"26 0.697946 False \n",
"8 0.100000 False \n",
"18 0.677458 False \n",
"12 0.700000 False \n",
"11 0.500000 False \n",
"9 0.906388 False \n",
"4 0.600000 False \n",
"14 0.750000 False \n",
"0 0.200000 False \n",
"6 1.000000 False \n",
"10 0.785170 False \n",
"7 0.500000 False \n",
"13 0.300000 False \n",
"28 0.000000 True \n",
"3 0.400000 True \n",
"8 0.100000 False \n",
"17 0.526975 False \n",
"14 0.750000 False \n",
"9 0.906388 False \n",
"18 0.677458 False \n",
"11 0.500000 False \n",
"7 0.500000 False \n",
"16 0.750253 False \n",
"12 0.700000 False \n",
"15 0.789190 False \n",
"5 0.000000 False \n",
"4 0.600000 False \n",
"0 0.200000 False \n",
"29 0.500000 True \n",
"28 0.000000 True \n",
"25 0.540932 False \n",
"2 0.300000 False \n",
"27 0.898161 False \n",
"6 1.000000 False \n",
"20 0.677458 False \n",
"1 0.150000 False "
"10 0.785170 False \n",
"3 0.400000 True \n",
"2 0.300000 False \n",
"1 0.150000 False \n",
"27 0.898161 False "
]
},
"execution_count": 86,
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"main_metric = 'train/diff_sum'\n",
"main_metric = 'train/diff%_sum'\n",
"df_res[['f', main_metric, 'novelty', 'in_training']].sort_values( main_metric) "
]
},
{
"cell_type": "code",
"execution_count": 87,
"execution_count": 94,
"metadata": {},
"outputs": [
{
@@ -3307,7 +3308,7 @@
"[30 rows x 24 columns]"
]
},
"execution_count": 87,
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
@@ -3318,7 +3319,28 @@
},
{
"cell_type": "code",
"execution_count": 88,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'df_res' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdf_res\u001b[49m\u001b[38;5;241m.\u001b[39mbefore\n",
"\u001b[0;31mNameError\u001b[0m: name 'df_res' is not defined"
]
}
],
"source": [
"df_res.before"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [],
"source": [
@@ -3331,7 +3353,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(df_res.to_markdown())"
"# print(df_res.to_markdown())"
]
},
{
@@ -3458,7 +3480,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0rc1"
"version": "3.11.0"
}
},
"nbformat": 4,
File diff suppressed because one or more lines are too long
+51 -21
View File
@@ -11,7 +11,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@@ -33,7 +33,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
@@ -192,7 +192,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -215,25 +215,48 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "fe4e30aeb2614c759ca799709a89f5c3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"0it [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Loaded 9346 posts from cache\n"
"\u001b[32m2025-07-26 11:17:04.797\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mitems_list\u001b[0m:\u001b[36m110\u001b[0m - \u001b[1mStarting from {next_date}\u001b[0m\n",
"\u001b[32m2025-07-26 11:17:04.798\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mitems_list\u001b[0m:\u001b[36m113\u001b[0m - \u001b[1mFetching posts after 2023-01-01\u001b[0m\n",
"\u001b[32m2025-07-26 11:17:05.927\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mfetch_posts\u001b[0m:\u001b[36m98\u001b[0m - \u001b[31m\u001b[1mFailed to fetch posts: {\"errors\":[{\"message\":\"Expected value of type \\\"JSON\\\", found {excludeEvents: true, view: \\\"old\\\", af: False, limit: 50, karmaThreshold: -10000, after: \\\"2023-01-01\\\", filter: \\\"tagged\\\"}; JSON cannot represent value: False\",\"locations\":[{\"line\":4,\"column\":24}],\"extensions\":{\"code\":\"GRAPHQL_VALIDATION_FAILED\"}},{\"message\":\"Cannot query field \\\"allVotes\\\" on type \\\"Post\\\".\",\"locations\":[{\"line\":40,\"column\":21}],\"extensions\":{\"code\":\"GRAPHQL_VALIDATION_FAILED\"}}]}\n",
"\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"9346"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
"ename": "HTTPError",
"evalue": "400 Client Error: Bad Request for url: https://www.lesswrong.com/graphql",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mHTTPError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[8]\u001b[39m\u001b[32m, line 8\u001b[39m\n\u001b[32m 5\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 7\u001b[39m posts = []\n\u001b[32m----> \u001b[39m\u001b[32m8\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mpost\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mtqdm\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgw\u001b[49m\u001b[43m.\u001b[49m\u001b[43mitems_list\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 9\u001b[39m \u001b[43m \u001b[49m\u001b[43mposts\u001b[49m\u001b[43m.\u001b[49m\u001b[43mappend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpost\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 11\u001b[39m cache_file.write_text(json.dumps(posts, indent=\u001b[32m2\u001b[39m))\n",
"\u001b[36mFile \u001b[39m\u001b[32m/media/wassname/SGIronWolf/projects5/bs_writing_detector/.venv/lib/python3.11/site-packages/tqdm/notebook.py:250\u001b[39m, in \u001b[36mtqdm_notebook.__iter__\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 248\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 249\u001b[39m it = \u001b[38;5;28msuper\u001b[39m().\u001b[34m__iter__\u001b[39m()\n\u001b[32m--> \u001b[39m\u001b[32m250\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mit\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 251\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# return super(tqdm...) will not catch exception\u001b[39;49;00m\n\u001b[32m 252\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01myield\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\n\u001b[32m 253\u001b[39m \u001b[38;5;66;03m# NB: except ... [ as ...] breaks IPython async KeyboardInterrupt\u001b[39;00m\n",
"\u001b[36mFile \u001b[39m\u001b[32m/media/wassname/SGIronWolf/projects5/bs_writing_detector/.venv/lib/python3.11/site-packages/tqdm/std.py:1181\u001b[39m, in \u001b[36mtqdm.__iter__\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 1178\u001b[39m time = \u001b[38;5;28mself\u001b[39m._time\n\u001b[32m 1180\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1181\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43miterable\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 1182\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01myield\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\n\u001b[32m 1183\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Update and possibly print the progressbar.\u001b[39;49;00m\n\u001b[32m 1184\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Note: does not call self.update(1) for speed optimisation.\u001b[39;49;00m\n",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 114\u001b[39m, in \u001b[36mGreaterWrong.items_list\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 112\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m next_date:\n\u001b[32m 113\u001b[39m logger.info(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mFetching posts after \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnext_date\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m--> \u001b[39m\u001b[32m114\u001b[39m posts = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mfetch_posts\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mmake_query\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnext_date\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 115\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m posts[\u001b[33m\"\u001b[39m\u001b[33mresults\u001b[39m\u001b[33m\"\u001b[39m]:\n\u001b[32m 116\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m\n",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 96\u001b[39m, in \u001b[36mGreaterWrong.fetch_posts\u001b[39m\u001b[34m(self, query)\u001b[39m\n\u001b[32m 87\u001b[39m res = requests.post(\n\u001b[32m 88\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.base_url\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m/graphql\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 89\u001b[39m \u001b[38;5;66;03m# The GraphQL endpoint returns a 403 if the user agent isn't set... Makes sense, but is annoying\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 93\u001b[39m json={\u001b[33m\"\u001b[39m\u001b[33mquery\u001b[39m\u001b[33m\"\u001b[39m: query},\n\u001b[32m 94\u001b[39m )\n\u001b[32m 95\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m---> \u001b[39m\u001b[32m96\u001b[39m \u001b[43mres\u001b[49m\u001b[43m.\u001b[49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 97\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m requests.exceptions.HTTPError:\n\u001b[32m 98\u001b[39m logger.error(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mFailed to fetch posts: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mres.text\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n",
"\u001b[36mFile \u001b[39m\u001b[32m/media/wassname/SGIronWolf/projects5/bs_writing_detector/.venv/lib/python3.11/site-packages/requests/models.py:1026\u001b[39m, in \u001b[36mResponse.raise_for_status\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 1021\u001b[39m http_error_msg = (\n\u001b[32m 1022\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.status_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m Server Error: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mreason\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m for url: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.url\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m 1023\u001b[39m )\n\u001b[32m 1025\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[32m-> \u001b[39m\u001b[32m1026\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response=\u001b[38;5;28mself\u001b[39m)\n",
"\u001b[31mHTTPError\u001b[39m: 400 Client Error: Bad Request for url: https://www.lesswrong.com/graphql"
]
}
],
"source": [
@@ -253,7 +276,7 @@
},
{
"cell_type": "code",
"execution_count": 109,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -293,14 +316,14 @@
"df.drop(columns=['emojiReactors'], inplace=True)\n",
"for col in ['postedAt', 'modifiedAt']:\n",
" df[col] = pd.to_datetime(df[col])\n",
"p_file = Path('output/01greaterwrong.json')\n",
"p_file = Path('output/01greaterwrong.parquet')\n",
"df.to_parquet(p_file)\n",
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 110,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -313,7 +336,7 @@
},
{
"cell_type": "code",
"execution_count": 111,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -425,9 +448,16 @@
"df.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"novelty is baseScore normalised to [0, 1]"
]
},
{
"cell_type": "code",
"execution_count": 141,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -470,7 +500,7 @@
},
{
"cell_type": "code",
"execution_count": 147,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -567,7 +597,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
"version": "3.11.0rc1"
}
},
"nbformat": 4,
+1
View File
@@ -20,6 +20,7 @@ dependencies = [
"pandas>=2.3.1",
"peft>=0.16.0",
"python-frontmatter>=1.1.0",
"seaborn>=0.13.2",
"tqdm>=4.67.1",
"transformers>=4.54.0",
]
+132
View File
@@ -0,0 +1,132 @@
---
title: Predictable Numbers
url:
novelty: 0.5
date: 2100-01-01
---
0
2
2
4
6
10
16
26
42
68
10
78
88
66
54
20
74
94
68
62
30
92
22
14
36
50
86
36
22
58
80
38
18
56
74
30
4
34
38
72
10
82
92
74
66
40
6
46
52
98
50
48
98
46
44
90
34
24
58
82
40
22
62
84
46
30
76
6
82
88
70
58
28
86
14
0
14
14
28
42
70
12
82
94
76
70
46
16
62
78
40
18
58
76
34
10
44
54
98
52
These numbers were generated with the following code:
def fibonacci(n):
a = 0
b = 1
# Check if n is less than 0
if n < 0:
print("Incorrect input")
# Check if n is equal to 0
elif n == 0:
return 0
# Check if n is equal to 1
elif n == 1:
return b
else:
for i in range(1, n):
c = a + b
a = b
b = c
return b
print(np.array([fibonacci(i) for i in range(100)]) * 2 % 100)
+106
View File
@@ -0,0 +1,106 @@
---
title: Random Numbers
url:
novelty: 0.0
date: 2100-01-01
---
95
32
48
57
96
21
69
1
8
95
1
91
95
52
94
33
70
26
62
17
10
31
82
68
48
77
98
89
26
10
98
72
93
15
18
77
71
26
16
58
48
23
69
58
45
36
58
31
45
91
79
94
94
64
1
88
38
91
52
57
13
78
13
13
42
55
27
93
69
36
43
17
21
24
51
65
59
51
86
89
32
33
65
30
68
37
54
55
95
53
16
99
9
4
8
25
15
96
31
87
Generated
+16
View File
@@ -231,6 +231,7 @@ dependencies = [
{ name = "pandas" },
{ name = "peft" },
{ name = "python-frontmatter" },
{ name = "seaborn" },
{ name = "tqdm" },
{ name = "transformers" },
]
@@ -255,6 +256,7 @@ requires-dist = [
{ name = "pandas", specifier = ">=2.3.1" },
{ name = "peft", specifier = ">=0.16.0" },
{ name = "python-frontmatter", specifier = ">=1.1.0" },
{ name = "seaborn", specifier = ">=0.13.2" },
{ name = "tqdm", specifier = ">=4.67.1" },
{ name = "transformers", specifier = ">=4.54.0" },
]
@@ -2179,6 +2181,20 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/69/e2/b011c38e5394c4c18fb5500778a55ec43ad6106126e74723ffaee246f56e/safetensors-0.5.3-cp38-abi3-win_amd64.whl", hash = "sha256:836cbbc320b47e80acd40e44c8682db0e8ad7123209f69b093def21ec7cafd11", size = 308878, upload-time = "2025-02-26T09:15:14.99Z" },
]
[[package]]
name = "seaborn"
version = "0.13.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "matplotlib" },
{ name = "numpy" },
{ name = "pandas" },
]
sdist = { url = "https://files.pythonhosted.org/packages/86/59/a451d7420a77ab0b98f7affa3a1d78a313d2f7281a57afb1a34bae8ab412/seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7", size = 1457696, upload-time = "2024-01-25T13:21:52.551Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/83/11/00d3c3dfc25ad54e731d91449895a79e4bf2384dc3ac01809010ba88f6d5/seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987", size = 294914, upload-time = "2024-01-25T13:21:49.598Z" },
]
[[package]]
name = "sentencepiece"
version = "0.2.0"