mirror of
https://github.com/wassname/detect_bs_text.git
synced 2026-06-27 16:30:17 +08:00
init
This commit is contained in:
@@ -0,0 +1,2 @@
|
||||
.env
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
# Experiment using LLM's to detect BS writing
|
||||
|
||||
|
||||
An experiment to see if I can detect "BS" using LLM's in a robust way.
|
||||
|
||||
It's impossible to detect AI generated text with 100% accuracy. But we will have more success detecting bad, empty, or contentless text.
|
||||
|
||||
Why might this work? As with [Schmidhuber's definition of curiosity](https://arxiv.org/abs/0812.4360) good writing should initially suprise the reader but become less suprising as they learn about it. Empty writing is not suprising. And confusing writing stays confusing even after you have read it. In other words it should have a high perplexity, which goes down after learning it.
|
||||
|
||||
The ideal way to do this would be by fine tuning. But that would be momory intensive so I will try and do it with prompts. I ask:
|
||||
- Is the text suprising (high perplexity)
|
||||
- Is less suprising when given a summary? (low perplexity)
|
||||
|
||||
If yes/yes then it's may be suprising new information. If either is not true, then it's proboably BS.
|
||||
|
||||
See main.ipynb
|
||||
+334
@@ -0,0 +1,334 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/media/wassname/SGIronWolf/projects5/bs_writing_detector/.venv/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from evaluate import load\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"samples = [\n",
|
||||
" {\"name\": \"bad_ml\", \"url\":\"https://arxiv.org/abs/2312.10868\", \"text\": \"\"\"This roadmap survey has embarked on an exploration of the\n",
|
||||
"transformative trends in generative AI research, particularly focusing on speculated advancements like Q* and the progressive strides towards AGI. Our analysis highlights a crucial paradigm shift, driven by innovations such as MoE, multi-modal learning, and the pursuit of AGI. These advancements signal a future where AI systems could significantly extend their capabilities in reasoning, contextual understanding, and creative problem-solving. This study reflects on AI’s dual potential to either contribute to or impede global equity and justice. The equitable distribution of AI benefits and its role in decision-making processes raise crucial questions about fairness and inclusivity. It is imperative to thoughtfully integrate AI into societal structures to enhance justice and reduce disparities. Despite these advancements, several open questions and research gaps remain. These include ensuring the ethical alignment of advanced AI systems with human values and societal norms, a challenge compounded by their increasing autonomy. The safety and robustness of AGI systems in diverse environments also remain a significant research gap. Addressing these challenges requires a multidisciplinary approach, incorporating ethical, social, and philosophical perspectives. Our survey has highlighted key areas for future inter-disciplinary research in AI, emphasizing the integration of ethical, sociological, and technical perspectives. This approach will foster collaborative research, bridging the gap between technological advancement and societal needs, ensuring that AI development is aligned with human values and global welfare. The roles of MoE, multimodal, and AGI in reshaping generative AI have been identified as significant, as their advancements can enhance model performance and versatility, and pave the way for future research in areas like ethical AI alignment and AGI. As we forge ahead, the balance between AI advancements and human creativity is not just a goal but a necessity, ensuring AI’s role as a complementary force that amplifies our capacity to innovate and solve complex challenges. Our responsibility is to guide these advancements towards enriching the human experience, aligning technological progress with ethical standards and societal well-being. \"\"\"},\n",
|
||||
" {\"name\": \"good_ml\", \"url\":\"https://arxiv.org/abs/2310.01405\", \"text\": \"\"\"We explored representation engineering (RepE), an approach to top-down transparency for AI systems. Inspired by the Hopfieldian view in cognitive neuroscience, RepE places representations and the transformations between them at the center of analysis. As neural networks exhibit more coherent internal structures, we believe analyzing them at the representation level can yield new insights, aiding in effective monitoring and control. Taking early steps in this direction, we proposed new RepE methods, which obtained state-of-the-art on TruthfulQA, and we demonstrated how RepE and can provide traction on a wide variety of safety-relevant problems. While we mainly analyzed subspaces of representations, future work could investigate trajectories, manifolds, and state-spaces of representations. We hope this initial step in exploring the potential of RepE helps to foster new insights into understanding and controlling AI systems, ultimately ensuring that future AI systems are trustworthy and safe.\"\"\"},\n",
|
||||
"\n",
|
||||
" {\"name\": \"sokal hoax\", \"url\":\"www.physics.nyu.edu/faculty/sokal/transgress_v2/transgress_v2_singlefile.html\", \"text\": \"\"\" There are many natural scientists, and especially physicists, who continue to reject the notion that the disciplines concerned with social and cultural criticism can have anything to contribute, except perhaps peripherally, to their research. Still less are they receptive to the idea that the very foundations of their worldview must be revised or rebuilt in the light of such criticism. Rather, they cling to the dogma imposed by the long post-Enlightenment hegemony over the Western intellectual outlook, which can be summarized briefly as follows: that there exists an external world, whose properties are independent of any individual human being and indeed of humanity as a whole; that these properties are encoded in ``eternal'' physical laws; and that human beings can obtain reliable, albeit imperfect and tentative, knowledge of these laws by hewing to the ``objective'' procedures and epistemological strictures prescribed by the (so-called) scientific method.\n",
|
||||
"\n",
|
||||
" But deep conceptual shifts within twentieth-century science have undermined this Cartesian-Newtonian metaphysics1; revisionist studies in the history and philosophy of science have cast further doubt on its credibility2; and, most recently, feminist and poststructuralist critiques have demystified the substantive content of mainstream Western scientific practice, revealing the ideology of domination concealed behind the façade of ``objectivity''.3 It has thus become increasingly apparent that physical ``reality'', no less than social ``reality'', is at bottom a social and linguistic construct; that scientific ``knowledge\", far from being objective, reflects and encodes the dominant ideologies and power relations of the culture that produced it; that the truth claims of science are inherently theory-laden and self-referential; and consequently, that the discourse of the scientific community, for all its undeniable value, cannot assert a privileged epistemological status with respect to counter-hegemonic narratives emanating from dissident or marginalized communities. These themes can be traced, despite some differences of emphasis, in Aronowitz's analysis of the cultural fabric that produced quantum mechanics4; in Ross' discussion of oppositional discourses in post-quantum science5; in Irigaray's and Hayles' exegeses of gender encoding in fluid mechanics6; and in Harding's comprehensive critique of the gender ideology underlying the natural sciences in general and physics in particular.7 \"\"\",},\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" {\"name\": \"einsteins theory of general relativity\", \"url\":\"\", \"text\": \"\"\"In recent years I have worked, in part together with my friend Grossman, on a [1] generalization of the theory of relativity. During these investigations, a kaleidoscopic mixture of postulates from physics and mathematics has been introduced and used as heuristical tools; as a consequence it is not easy to see through and characterize the theory from a formal mathematical point of view, that is, only based upon these papers. The primary objective of the present paper is to close this gap. In particular, it has been possible to obtain the equations of the gravitational field in a purely covariance-theoretical manner (section D). I also tried to give simple derivations of the basic laws of absolute differential calculus-in part, they are probably new ones (section B)-in order to allow the reader to get a complete grasp of the theory without having to read other, purely mathematical tracts. As an illustration of the mathematical methods, I derived the (Eulerian) equations of hydrodynamics and the field equations of the electrodynamics of moving bodies (section C). Section E shows that Newton's theory of gravitation follows from the general theory as an approximation. The most elementary features of the present theory are also derived inasfar as [2] they are characteristic of a Newtonian (static) gravitational field (curvature of light rays, shift of spectral \"\"\",},\n",
|
||||
"\n",
|
||||
" {\"name\": \"lorem ipsum \", \"url\":\"\", \"text\": \"\"\"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.\"\"\"},\n",
|
||||
"\n",
|
||||
" {\"name\": \"wikipedia on LK-99\", \"url\":\"https://en.wikipedia.org/wiki/LK-99\", \"text\": \"\"\"Some small LK-99 samples were reported to show strong diamagnetic properties, including a response confusingly[23] referred to as \"partial levitation\" over a magnet.[18] This was misinterpreted by some as a sign of superconductivity, although it is a sign of regular diamagnetism or ferromagnetism.\n",
|
||||
"\n",
|
||||
"While initial preprints claimed the material was a room-temperature superconductor,[18]: 1 they did not report observing any definitive features of superconductivity, such as zero resistance, the Meissner effect, flux pinning, AC magnetic susceptibility, the Josephson effect, a temperature-dependent critical field and current, or a sudden jump in specific heat around the critical temperature.[24]\n",
|
||||
"\n",
|
||||
"As it is common for a new material to spuriously seem like a potential candidate for high-temperature superconductivity,[13] thorough experimental reports normally demonstrate a number of these expected properties. As of 15 October 2023, not one of these properties had been observed by the original experiment or any replications.[25] \"\"\",},\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"{\"name\": \"I have a dream\", \"url\":\"\", \"text\": \"\"\"So even though we face the difficulties of today and tomorrow, I still have a dream. It is a dream deeply rooted in the American dream. I have a dream that one day this nation will rise up and live out the true meaning of its creed: We hold these truths to be self-evident, that all men are created equal.\n",
|
||||
"\n",
|
||||
"People clap and sing along to a freedom song between speeches at the March on Washington for Jobs and Freedom in 1963.\n",
|
||||
"Express Newspapers via Getty Images\n",
|
||||
"\n",
|
||||
"I have a dream that one day on the red hills of Georgia, the sons of former slaves and the sons of former slave owners will be able to sit down together at the table of brotherhood.\n",
|
||||
"\n",
|
||||
"I have a dream that one day even the state of Mississippi, a state sweltering with the heat of injustice, sweltering with the heat of oppression will be transformed into an oasis of freedom and justice.\"\"\",},\n",
|
||||
"{\"name\": \"AI gen fake paper\", \"url\":\"\", \"text\": \"\"\"\n",
|
||||
"Machine Learning (ML) as a field has largely embraced binary constructs as foundational to its functioning, with 0/1 and true/false distinctions underpinning many of its core algorithms. This paper argues that these binary frameworks are not merely technical conveniences but are, in fact, reflective of deeper hegemonic paradigms that perpetuate exclusionary practices and systemic biases. By uncritically adopting these binaries, ML inadvertently reinforces a worldview that marginalizes complex, nuanced identities and experiences.\n",
|
||||
"\n",
|
||||
"We propose a critical examination of these binaries, questioning the necessity and ubiquity of dualistic thinking within ML. We suggest that the field's reliance on binary classification not only limits its predictive accuracy in certain contexts but also fails to capture the rich, fluid nature of human experiences and societal structures. Instead, we advocate for a \"fluid\" approach to algorithms that allows for more nuanced and inclusive representations of reality, rejecting the oversimplified and often exclusionary nature of strict binary outcomes.\n",
|
||||
"\n",
|
||||
"Furthermore, this paper argues for the integration of intersectional data that reflects the diverse and overlapping categories of identity, including race, gender, class, and more. Current ML models often overlook these complexities, leading to outcomes that fail to serve, and even harm, underrepresented populations. We critique the prevailing notion that ML is a neutral, objective tool, highlighting the lack of socio-political context in algorithmic decision-making processes.\n",
|
||||
"\n",
|
||||
"In conclusion, we call for an epistemological shift in the field of Machine Learning. This shift involves moving away from a purely positivistic, binary approach towards one that is reflective, inclusive, and aware of the social dimensions of technology. By reimagining the foundational paradigms of ML, we can work towards a more equitable and nuanced understanding of the world, one that respects and represents the full spectrum of human experience.\n",
|
||||
"\"\"\",},\n",
|
||||
"\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"model_names = [\n",
|
||||
" # \"EleutherAI/pythia-70m-v0\",\n",
|
||||
" \"EleutherAI/pythia-160m\",\n",
|
||||
" # \"EleutherAI/pythia-410m-v0\",\n",
|
||||
" # \"EleutherAI/pythia-1b-v0\",\n",
|
||||
" \"EleutherAI/pythia-2.8b\",\n",
|
||||
" # \"EleutherAI/pythia-6.9b-v0\",\n",
|
||||
" \"EleutherAI/pythia-12b\",\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from dotenv import load_dotenv\n",
|
||||
"\n",
|
||||
"load_dotenv() "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"What's the best way to measure the information content of text? I don't care about style or jargon or acryonyms.\n",
|
||||
"\n",
|
||||
"One simple way is: perplexity_large_model - perplexity_small_model. This measures if having a bigger brain helps understand/write this text. How would that work if\n",
|
||||
"\n",
|
||||
"- new jargon?\n",
|
||||
"- new results?\n",
|
||||
"- new writing style?\n",
|
||||
"- math, acronyms\n",
|
||||
"- simple language\n",
|
||||
"- incoherent\n",
|
||||
"- vauge language?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'tl;dr: Representation engineering (RepE) is a transparent approach for analyzing AI systems. By focusing on representations and their transformations, RepE can provide new insights and aid in monitoring and controlling AI systems. Initial RepE methods achieved state-of-the-art results on TruthfulQA, showing promise for safety-related problems. Future work can explore different aspects of representations. RepE has the potential to improve understanding and trustworthiness of AI systems.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from openai import OpenAI\n",
|
||||
"\n",
|
||||
"def summize(text):\n",
|
||||
" client = OpenAI()\n",
|
||||
" content = f\"Make a tl;dr of this text in <280 chars.\\n\\n## Text\\n\\n{text}\\n\\n## Instruction\\n\\nMake a tl;dr of this text in <280 chars. Start with the most important, as extra text will be discarded :\\n\\ntl;dr:\"\n",
|
||||
" chat_completion = client.chat.completions.create(\n",
|
||||
" messages=[\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": content,\n",
|
||||
" }\n",
|
||||
" ],\n",
|
||||
" model=\"gpt-3.5-turbo\",\n",
|
||||
" )\n",
|
||||
" # print(content)\n",
|
||||
" r = chat_completion.choices[0].message.content\n",
|
||||
" return r\n",
|
||||
"\n",
|
||||
"r = summize(samples[1][\"text\"])\n",
|
||||
"r"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"490"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(r)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from evaluate import evaluator\n",
|
||||
"from datasets import Dataset\n",
|
||||
"from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline\n",
|
||||
"import torch\n",
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"perplexity = load(\"perplexity\", module_type=\"metric\")\n",
|
||||
"data = []\n",
|
||||
"\n",
|
||||
"summaries = {}\n",
|
||||
"for model_name in model_names:\n",
|
||||
" for sample in samples:\n",
|
||||
" if sample['name'] not in summaries:\n",
|
||||
" summaries[sample['name']] = summize(sample['text'])[:600]\n",
|
||||
" summary = summaries[sample['name']]\n",
|
||||
"\n",
|
||||
" # before \n",
|
||||
" s = sample['text']\n",
|
||||
" results = perplexity.compute(predictions=s, model_id=model_name, device='cuda')\n",
|
||||
" before = results['mean_perplexity']\n",
|
||||
"\n",
|
||||
" # after \n",
|
||||
" s = f\"\"\"\n",
|
||||
" High level summary: {summary}\n",
|
||||
"\n",
|
||||
"Text:\n",
|
||||
"{sample['text']}\n",
|
||||
" \"\"\"\n",
|
||||
" results = perplexity.compute(predictions=s, model_id=model_name, device='cuda')\n",
|
||||
" after = results['mean_perplexity']\n",
|
||||
"\n",
|
||||
" print(model_name, sample['name'], before, after)\n",
|
||||
" data.append(dict(before=before, after=after, model=model_name, sample=sample['name']))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# df = pd.DataFrame(data)\n",
|
||||
"# df\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"df = pd.DataFrame(data)\n",
|
||||
"df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df['improvement'] = df['before'] - df['after']\n",
|
||||
"df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Are smarter models less confused?\n",
|
||||
"# Does it confuse dumber models?\n",
|
||||
"df.groupby('sample').apply(lambda x: x['perplexity'].max()-x['perplexity'].min())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Are smarter models less confused?\n",
|
||||
"# Does it confuse dumber models?\n",
|
||||
"df.groupby('sample').apply(lambda x: x['perplexity'].max())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Are smarter models less confused?\n",
|
||||
"# Does it confuse dumber models?\n",
|
||||
"def content_rating(x):\n",
|
||||
" max = x['perplexity'].max()\n",
|
||||
" min = x['perplexity'].min()\n",
|
||||
" return (max-min)/max\n",
|
||||
"\n",
|
||||
"df.groupby('sample').apply(content_rating)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.0rc1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Generated
+3379
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,40 @@
|
||||
[tool.poetry]
|
||||
name = "src"
|
||||
version = "0.1.0"
|
||||
description = "building a BS detector using perplexity"
|
||||
authors = ["wassname <git@wassname.org>"]
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.10,<3.13"
|
||||
torch = {version = "^2.1.0+cu118", source = "pytorch"}
|
||||
tqdm = "^4.66.1"
|
||||
datasets = "^2.14.5"
|
||||
transformers = "4.34.0"
|
||||
optimum = "^1.13.2"
|
||||
numpy = "^1.26.1"
|
||||
pandas = "^2.1.1"
|
||||
scikit-learn = "^1.3.1"
|
||||
bitsandbytes = "^0.41.3.post2"
|
||||
packaging = "^23.2"
|
||||
peft = "^0.7.1"
|
||||
evaluate = "^0.4.1"
|
||||
auto-gptq = "^0.6.0"
|
||||
openai = "^1.6.1"
|
||||
python-dotenv = "^1.0.0"
|
||||
|
||||
[[tool.poetry.source]]
|
||||
name = "pytorch"
|
||||
url = "https://download.pytorch.org/whl/cu118"
|
||||
priority = "explicit"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
ipykernel = "^6.25.2"
|
||||
black = "^23.10.0"
|
||||
pylama = "^8.4.1"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
Reference in New Issue
Block a user