mirror of
https://github.com/wassname/Open-Assistant.git
synced 2026-06-27 16:10:30 +08:00
Cleanup notebooks folder (#716)
- standardize file naming to be a little more pythonic. - one folder per notebook plus a README for easy GH viewing. - adjust colab badges for new structure.
This commit is contained in:
@@ -1,21 +0,0 @@
|
||||
# Generate Topics, Questions, and Answers from a text
|
||||
|
||||
This python code can be used to generate topics, questions, and answers from a
|
||||
paragraph of text. This is a good way to generate ground truth knowledge about a
|
||||
topic from a trusted source.
|
||||
|
||||
The output of this is a dictionary with:
|
||||
|
||||
1. submitted paragraph
|
||||
1. generated topics
|
||||
1. generated questions
|
||||
1. generated topic prefixes that can be prepended to the questions
|
||||
1. open book answer based only on the provided paragraph
|
||||
1. closed book answers generated by FLAN-T5-11B
|
||||
|
||||
## Contributing
|
||||
|
||||
This code is verified to work on a 24GB vram graphics card (like an RTX3090). We
|
||||
are working on getting it to run on google colab TPUs and also it may be
|
||||
possible to use smaller T5 models like the 3 billion parameter model and still
|
||||
get acceptable results.
|
||||
@@ -1,406 +0,0 @@
|
||||
# This notebook will run on a system with a single RTX3090 (24 GB vram).
|
||||
# You need to install accelerate, bitsandbytes, and transformers
|
||||
|
||||
import math
|
||||
import pickle
|
||||
import time
|
||||
|
||||
import torch
|
||||
|
||||
# load all needed libraries
|
||||
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
||||
|
||||
# This device map will work a GPU with > 24GB vram.
|
||||
# It uses nearly all the memory.
|
||||
device_map_T5_13B = {
|
||||
"shared": 0,
|
||||
"decoder.embed_tokens": 0,
|
||||
"encoder.embed_tokens": 0,
|
||||
"encoder.block.0": 0,
|
||||
"encoder.block.1": 0,
|
||||
"encoder.block.2": 0,
|
||||
"encoder.block.3": 0,
|
||||
"encoder.block.4": 0,
|
||||
"encoder.block.5": 0,
|
||||
"encoder.block.6": 0,
|
||||
"encoder.block.7": 0,
|
||||
"encoder.block.8": 0,
|
||||
"encoder.block.9": 0,
|
||||
"encoder.block.10": 0,
|
||||
"encoder.block.11": 0,
|
||||
"encoder.block.12": 0,
|
||||
"encoder.block.13": 0,
|
||||
"encoder.block.14": 0,
|
||||
"encoder.block.15": 0,
|
||||
"encoder.block.16": 0,
|
||||
"encoder.block.17": 0,
|
||||
"encoder.block.18": 0,
|
||||
"encoder.block.19": 0,
|
||||
"encoder.block.20": 0,
|
||||
"encoder.block.21": 0,
|
||||
"encoder.block.22": 0,
|
||||
"encoder.block.23": 0,
|
||||
"encoder.final_layer_norm": 0,
|
||||
"encoder.dropout": 0,
|
||||
"decoder.block.0": 0,
|
||||
"decoder.block.1": 0,
|
||||
"decoder.block.2": 0,
|
||||
"decoder.block.3": 0,
|
||||
"decoder.block.4": 0,
|
||||
"decoder.block.5": 0,
|
||||
"decoder.block.6": 0,
|
||||
"decoder.block.7": 0,
|
||||
"decoder.block.8": 0,
|
||||
"decoder.block.9": 0,
|
||||
"decoder.block.10": 0,
|
||||
"decoder.block.11": 0,
|
||||
"decoder.block.12": 0,
|
||||
"decoder.block.13": 0,
|
||||
"decoder.block.14": 0,
|
||||
"decoder.block.15": 0,
|
||||
"decoder.block.16": 0,
|
||||
"decoder.block.17": 0,
|
||||
"decoder.block.18": 0,
|
||||
"decoder.block.19": 0,
|
||||
"decoder.block.20": 0,
|
||||
"decoder.block.21": 0,
|
||||
"decoder.block.22": 0,
|
||||
"decoder.block.23": 0,
|
||||
"decoder.final_layer_norm": 0,
|
||||
"decoder.dropout": 0,
|
||||
"lm_head": 0,
|
||||
}
|
||||
|
||||
|
||||
# Load the model in bfloat16. Make sure to use bfloat16
|
||||
# if you are doing inference with 16bit precision.
|
||||
tokenizer = AutoTokenizer.from_pretrained("flan-t5-xxl")
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained(
|
||||
"flan-t5-xxl",
|
||||
device_map=device_map_T5_13B,
|
||||
torch_dtype=torch.bfloat16,
|
||||
load_in_8bit=False,
|
||||
)
|
||||
|
||||
|
||||
# Load strings as knowledge sources for QA generation.
|
||||
# You can do this with a pickle.
|
||||
objects = []
|
||||
with (open("paragraphs.pkl", "rb")) as openfile:
|
||||
while True:
|
||||
try:
|
||||
objects.append(pickle.load(openfile))
|
||||
except EOFError:
|
||||
break
|
||||
paragraphs = objects[0]
|
||||
|
||||
# Make sure no paragraphs are too long for T5.
|
||||
# It handles up to 512 tokens context length.
|
||||
fixed_paragraphs = []
|
||||
for k in paragraphs:
|
||||
if len(k) > 1100:
|
||||
pass
|
||||
else:
|
||||
fixed_paragraphs.append(k)
|
||||
print("Original number of paragraphs:", len(paragraphs))
|
||||
print("Length filtered number of paragraphs:", len(fixed_paragraphs))
|
||||
paragraphs = fixed_paragraphs
|
||||
|
||||
|
||||
# Sort_Tuple sorts a list of tuples
|
||||
# by the second element.
|
||||
def Sort_Tuple(tup):
|
||||
tup.sort(key=lambda x: x[1], reverse=True)
|
||||
return tup
|
||||
|
||||
|
||||
# ask_flan_T5 takes a text input and returns the
|
||||
# response of FLAN_T5 and a normalized logits
|
||||
# score for the generation.
|
||||
def ask_flan_T5(input_text):
|
||||
inputs = tokenizer.encode(input_text, return_tensors="pt").cuda(0)
|
||||
outputs = model.generate(
|
||||
inputs,
|
||||
do_sample=True,
|
||||
top_p=0.95,
|
||||
eos_token_id=1,
|
||||
max_new_tokens=50,
|
||||
bos_token_id=0,
|
||||
temperature=0.9,
|
||||
return_dict_in_generate=True,
|
||||
output_scores=True,
|
||||
)
|
||||
out_text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
|
||||
probs = torch.stack(outputs.scores, dim=1).softmax(-1)
|
||||
for i in outputs.sequences:
|
||||
logprobs = 0
|
||||
counter = 0
|
||||
for k in i[1:]:
|
||||
word_prob = (round(probs[0][counter][k.item()].item(), 2)) + 0.001
|
||||
logprobs = logprobs + math.log(word_prob)
|
||||
counter += 1
|
||||
out_tuple = (out_text, round(logprobs, 2))
|
||||
return out_tuple
|
||||
|
||||
|
||||
# ask_flan_T5D is a function that takes an input text and
|
||||
# returns the deterministic(do_sample=False) output of
|
||||
# FLAN_T5 and logits.
|
||||
def ask_flan_T5D(input_text):
|
||||
inputs = tokenizer.encode(input_text, return_tensors="pt").cuda(0)
|
||||
outputs = model.generate(
|
||||
inputs,
|
||||
do_sample=False,
|
||||
eos_token_id=1,
|
||||
max_new_tokens=50,
|
||||
bos_token_id=0,
|
||||
return_dict_in_generate=True,
|
||||
output_scores=True,
|
||||
)
|
||||
out_text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
|
||||
probs = torch.stack(outputs.scores, dim=1).softmax(-1)
|
||||
for i in outputs.sequences:
|
||||
logprobs = 0
|
||||
counter = 0
|
||||
for k in i[1:]:
|
||||
word_prob = (round(probs[0][counter][k.item()].item(), 2)) + 0.001
|
||||
logprobs = logprobs + math.log(word_prob)
|
||||
counter += 1
|
||||
out_tuple = (out_text, round(logprobs, 2))
|
||||
return out_tuple
|
||||
|
||||
|
||||
# Generate a topic classifier for a paragraph of text
|
||||
def generate_topic(paragraph):
|
||||
results = set()
|
||||
input_text = (
|
||||
"Task: Create a topic classifier for the provided \
|
||||
paragraph.\nParagraph:\n"
|
||||
+ paragraph
|
||||
+ "\nTopic: "
|
||||
)
|
||||
for k in range(0, 20):
|
||||
result = ask_flan_T5(input_text)
|
||||
if result[1] > -4:
|
||||
results.add(result)
|
||||
if len(results) < 3:
|
||||
results.add(("I was wondering", -3.3))
|
||||
results.add(("I have a question", -3.3))
|
||||
sorted_results = Sort_Tuple(list(results))
|
||||
return sorted_results[0:5]
|
||||
|
||||
|
||||
# Generate a topic classifier for a paragraph of text
|
||||
def generate_topic_prefix(topic_set):
|
||||
results = set()
|
||||
for entry in topic_set:
|
||||
topic = entry[0]
|
||||
input_text = (
|
||||
"Task: Create a prepositional phrase about the topic.\n\
|
||||
Example 1\n Topic: climbing mount everest\nPrepositional \
|
||||
Phrase: With regards to climbing mount everest,\nExample \
|
||||
2\nTopic: United States Air Force\nPrepositional Phrase: \
|
||||
On the topic of the United States Air Force,\n Example 3\nTopic: "
|
||||
+ topic
|
||||
+ "\nPrepositional Phrase: "
|
||||
)
|
||||
for k in range(0, 5):
|
||||
results.add(ask_flan_T5(input_text))
|
||||
sorted_results = Sort_Tuple(list(results))
|
||||
return sorted_results[0:5]
|
||||
|
||||
|
||||
# Generate who/what/where/when/why questions from a paragraph.
|
||||
# Number of questions variable is an integer which indicates how
|
||||
# many of each question type to try to generate.
|
||||
def generate_questions(paragraph, number_of_questions):
|
||||
if len(tokenizer.encode(paragraph)) > 480:
|
||||
print("Warning, the context length is too long.")
|
||||
question_set = set()
|
||||
question_types = [
|
||||
"What",
|
||||
"Where",
|
||||
"Why",
|
||||
"How",
|
||||
"Who",
|
||||
]
|
||||
for qtype in question_types:
|
||||
question = (
|
||||
"Please generate a question that starts with '"
|
||||
+ qtype
|
||||
+ "' based on the following paragraph.\nText:\n"
|
||||
+ paragraph
|
||||
+ "\nQuestion:\n"
|
||||
)
|
||||
for k in range(0, number_of_questions):
|
||||
new_question = ask_flan_T5(question)
|
||||
if qtype in new_question[0]:
|
||||
question_set.add((qtype, new_question))
|
||||
return question_set
|
||||
|
||||
|
||||
# Generate answers for a set of questions.
|
||||
# Input is the paragraph of text and a set of questions where each question
|
||||
# is a tuple generated from the generate_questions() function.
|
||||
def generate_answers(paragraph, question_set):
|
||||
possible_answers = set()
|
||||
for question in question_set:
|
||||
input_text = (
|
||||
"Please read the following paragraph and \
|
||||
then answer the question using only data \
|
||||
found in the text. If no answer is possible, respond \
|
||||
'NA'.\nText:\n"
|
||||
+ paragraph
|
||||
+ "\nQuestion:\n"
|
||||
+ question[1][0]
|
||||
+ "\nAnswer:\n"
|
||||
)
|
||||
answer = ask_flan_T5D(input_text)
|
||||
if "NA" in answer[0]:
|
||||
pass
|
||||
else:
|
||||
possible_answers.add((question[0], question[1], answer))
|
||||
return possible_answers
|
||||
|
||||
|
||||
# Generate questions from a paragraph and set of answers.
|
||||
# Input is the paragraph of text and a set of answers where each question
|
||||
# is a tuple generated from the generate_answers() function.
|
||||
def generate_question2(paragraph, qa_set):
|
||||
qaq_results = set()
|
||||
for qa_item in qa_set:
|
||||
answer = qa_item[2][0]
|
||||
input_text = (
|
||||
"Please read the following paragraph and \
|
||||
then generate a question whose answer is: "
|
||||
+ answer
|
||||
+ "\nParagraph:\n"
|
||||
+ paragraph
|
||||
+ "\nQuestion:\n"
|
||||
)
|
||||
result = ask_flan_T5D(input_text)
|
||||
qaq_results.add((qa_item[0], qa_item[1], qa_item[2], result))
|
||||
return qaq_results
|
||||
|
||||
|
||||
# Generate answers from a paragraph and set of questions.
|
||||
# Input is the paragraph of text and a set of questions where each answer
|
||||
# is a tuple generated from the generate_questions2() function.
|
||||
def generate_answers2(paragraph, question_set):
|
||||
possible_answers = set()
|
||||
for question in question_set:
|
||||
input_text = (
|
||||
"Please read the following paragraph and \
|
||||
then answer the question using only data \
|
||||
found in the text. If no answer is possible, respond \
|
||||
'NA'.\nText:\n"
|
||||
+ paragraph
|
||||
+ "\nQuestion:\n"
|
||||
+ question
|
||||
+ "\nAnswer:\n"
|
||||
)
|
||||
answer = ask_flan_T5D(input_text)
|
||||
possible_answers.add((question, answer))
|
||||
return possible_answers
|
||||
|
||||
|
||||
# Generate declarative statement from question and answer pair.
|
||||
def generate_declarative(qaq_set):
|
||||
qaqd_results = set()
|
||||
for qa_item in qaq_set:
|
||||
question = qa_item[0]
|
||||
answer = qa_item[1][0]
|
||||
if "NA" in answer:
|
||||
pass
|
||||
else:
|
||||
input_text = (
|
||||
"Generate a declarative statement based on the \
|
||||
given question and answer pair.\nQ: What is \
|
||||
sitting on the couch?\nA: poodle\nA poodle is \
|
||||
sitting on the couch.\nQ: "
|
||||
+ question
|
||||
+ "\nA: "
|
||||
+ answer
|
||||
+ "\n"
|
||||
)
|
||||
result = ask_flan_T5D(input_text)
|
||||
qaqd_results.add((question, answer, result))
|
||||
return qaqd_results
|
||||
|
||||
|
||||
# Generate closed book answer to question.
|
||||
def generate_closed_answer(qaqd_set):
|
||||
qaqd_results = set()
|
||||
for qa_item in qaqd_set:
|
||||
question = qa_item[0]
|
||||
answer = qa_item[2][0]
|
||||
if "NA" in answer:
|
||||
# print(answer)
|
||||
pass
|
||||
else:
|
||||
input_text = (
|
||||
"Task: Answer the question in a detailed fashion. \
|
||||
If the question cannot be answered without more \
|
||||
information, please answer NA.\nExample 1:\nQuestion: \
|
||||
Why does Shala like cookies?\nAnswer: It is not possible \
|
||||
to know why Shala likes cookies without more information, \
|
||||
but many people that like cookies enjoy their taste or \
|
||||
some of their ingredients (e.g. chocolate chips or \
|
||||
peanut butter).\nExample 2:\nQuestion: Why would someone \
|
||||
vote in an election?\nAnswer: There are many reasons \
|
||||
someone might vote in an election, for instance to have \
|
||||
their voice heard or to help a candidate they like win the \
|
||||
race.\nExample 3\nQuestion: What decoration goes on top of \
|
||||
a Christmas tree?\nAnswer: Usually a star is placed at the \
|
||||
top of a Christmas tree.\nExample 4:\nQuestion: "
|
||||
+ question
|
||||
+ "\nAnswer: "
|
||||
)
|
||||
result = ask_flan_T5D(input_text)
|
||||
qaqd_results.add((qa_item[0], qa_item[1], qa_item[2], result))
|
||||
return qaqd_results
|
||||
|
||||
|
||||
# Create a dictionary of questions and answers from a list of paragraphs.
|
||||
# Takes about 20 seconds per paragraph to process.
|
||||
start_time = time.perf_counter()
|
||||
questions_dict = {}
|
||||
uniq_id = 100000
|
||||
for paragraph in paragraphs[0:1500]:
|
||||
topic_list = generate_topic(paragraph)
|
||||
topic_prefix = generate_topic_prefix(topic_list)
|
||||
question_set = generate_questions(paragraph, 2)
|
||||
qa_set = generate_answers(paragraph, question_set)
|
||||
qaq_set = generate_question2(paragraph, qa_set)
|
||||
q2_set = set()
|
||||
for q in qaq_set:
|
||||
q2_set.add(q[3][0])
|
||||
q2a2_set = generate_answers2(paragraph, q2_set)
|
||||
a2d_set = generate_declarative(q2a2_set)
|
||||
a3cb_set = generate_closed_answer(a2d_set)
|
||||
questions_dict[uniq_id] = {}
|
||||
questions_dict[uniq_id]["topics"] = topic_list
|
||||
questions_dict[uniq_id]["topic prepositions"] = topic_prefix
|
||||
questions_dict[uniq_id]["paragraph"] = paragraph
|
||||
entry_count = 0
|
||||
entry_dict = {}
|
||||
for entry in a3cb_set:
|
||||
entry_dict[entry_count] = {}
|
||||
entry_dict[entry_count]["question"] = entry[0]
|
||||
entry_dict[entry_count]["answer_T5_ob"] = entry[2][0]
|
||||
entry_dict[entry_count]["answer_T5_cb"] = entry[3][0]
|
||||
entry_count += 1
|
||||
questions_dict[uniq_id]["QA_set"] = entry_dict
|
||||
uniq_id += 1
|
||||
print(uniq_id, "topics:", topic_prefix)
|
||||
|
||||
stop_time = time.perf_counter()
|
||||
generation_time = stop_time - start_time
|
||||
print(questions_dict[uniq_id - 1])
|
||||
print(generation_time)
|
||||
|
||||
|
||||
# create a binary pickle file to save your dictionary
|
||||
f = open("questions_dict.pkl", "wb")
|
||||
pickle.dump(questions_dict, f)
|
||||
f.close()
|
||||
@@ -0,0 +1,5 @@
|
||||
# Data Augmentation
|
||||
|
||||
This folder contains subfolders of notebooks broadly relating to data
|
||||
augmentation. Each subfolder contains a README.md file explaining what the
|
||||
notebooks in that folder do.
|
||||
+4
-4
@@ -5,7 +5,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/data-argumentation/EssayInstructions.ipynb)"
|
||||
"[](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/data-augmentation/essay-instructions/essay-instructions.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -210,7 +210,7 @@
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.8.10 64-bit",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -224,11 +224,11 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
"version": "3.7.4 (tags/v3.7.4:e09359112e, Jul 8 2019, 20:34:20) [MSC v.1916 64 bit (AMD64)]"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
|
||||
"hash": "25d5c2324055587ceaeef27650c79ce8358ea61d7689f2e0b8ada5d53f85bce4"
|
||||
}
|
||||
}
|
||||
},
|
||||
+12
-4
@@ -5,16 +5,24 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/data-argumentation/EssayRevision.ipynb)"
|
||||
"# Essay Revision"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/data-augmentation/essay-revision/essay-revision.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "o0lAqmWhsiUe"
|
||||
},
|
||||
"source": [
|
||||
"#Essay Revision\n",
|
||||
"The goal of this notebook is to use data argumentation to have data on improving essays. The way this is done is by taking a template \"good\" essay and making step by step changes that make it worse and add intructions on how to fix it."
|
||||
]
|
||||
},
|
||||
@@ -319,11 +327,11 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
"version": "3.7.4 (tags/v3.7.4:e09359112e, Jul 8 2019, 20:34:20) [MSC v.1916 64 bit (AMD64)]"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "492d89208e1af30f4727fd53e254ea56e6b1a843b376782bfa5f6ce13d676265"
|
||||
"hash": "25d5c2324055587ceaeef27650c79ce8358ea61d7689f2e0b8ada5d53f85bce4"
|
||||
}
|
||||
}
|
||||
},
|
||||
+18
-3
@@ -5,16 +5,24 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/data-argumentation/StackExchangeBuilder.ipynb)"
|
||||
"# Ingest StackExchange data dumps"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/data-augmentation/stackexchange-builder/stackexchange-builder.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "TB7CEfs8F-8u"
|
||||
},
|
||||
"source": [
|
||||
"# Ingest StackExchange data dumps\n",
|
||||
"This notebook takes a StackExchange Data dump \"Posts.xml\" file and ingests it into a Pandas Dataframe. Outputs of the file can be JSON, JSONL, Parquet, or CSV. "
|
||||
]
|
||||
},
|
||||
@@ -1842,10 +1850,17 @@
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
"name": "python",
|
||||
"version": "3.7.4 (tags/v3.7.4:e09359112e, Jul 8 2019, 20:34:20) [MSC v.1916 64 bit (AMD64)]"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "25d5c2324055587ceaeef27650c79ce8358ea61d7689f2e0b8ada5d53f85bce4"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
+9
-3
@@ -9,11 +9,12 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "b2e3c95c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/data-argumentation/UnifiedQA.ipynb)"
|
||||
"[](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/data-augmentation/unified-qa/unified-qa.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -493,7 +494,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -507,7 +508,12 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.7.4 (tags/v3.7.4:e09359112e, Jul 8 2019, 20:34:20) [MSC v.1916 64 bit (AMD64)]"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "25d5c2324055587ceaeef27650c79ce8358ea61d7689f2e0b8ada5d53f85bce4"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
+12
-5
@@ -5,7 +5,15 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/detoxify-evaluation/DetoxityEvaluation.ipynb)"
|
||||
"# Detoxify evaluation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/detoxify-evaluation/detoxify-evaluation.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -23,7 +31,6 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Detoxify evaluation\n",
|
||||
"[Detoxify](https://github.com/unitaryai/detoxify) is a open source model used to identify prompts as toxic\n",
|
||||
"\n",
|
||||
"<img src=\"https://raw.githubusercontent.com/unitaryai/detoxify/master/examples.png\" alt=\"Image from detoxify github that shows the example input/output of their model\" />\n",
|
||||
@@ -472,7 +479,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "DetoxifyEvaluation",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -486,12 +493,12 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
"version": "3.7.4 (tags/v3.7.4:e09359112e, Jul 8 2019, 20:34:20) [MSC v.1916 64 bit (AMD64)]"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "aeda4fe49bddd52f429be231bf767df53f2b167abae0a465a8ef142aa6b97b8a"
|
||||
"hash": "25d5c2324055587ceaeef27650c79ce8358ea61d7689f2e0b8ada5d53f85bce4"
|
||||
}
|
||||
}
|
||||
},
|
||||
@@ -1,4 +1,4 @@
|
||||
# OpenBuggerNotebook
|
||||
# OpenBugger
|
||||
|
||||
https://github.com/furlat/OpenBugger/blob/main/README.md is a Python package
|
||||
that allows you to inject syntax and logic errors into your code. This can be
|
||||
+11
-3
@@ -5,7 +5,15 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/code-bugger/openbugger_example.ipynb)"
|
||||
"# OpenBugger Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/openbugger/openbugger_example.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -272,12 +280,12 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6 (tags/v3.10.6:9c7b4bd, Aug 1 2022, 21:53:49) [MSC v.1932 64 bit (AMD64)]"
|
||||
"version": "3.7.4 (tags/v3.7.4:e09359112e, Jul 8 2019, 20:34:20) [MSC v.1916 64 bit (AMD64)]"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "ceba285e8b4e6478fe8ad229bc63940a90ad5cf3d143521e7c38823a2e915b21"
|
||||
"hash": "25d5c2324055587ceaeef27650c79ce8358ea61d7689f2e0b8ada5d53f85bce4"
|
||||
}
|
||||
}
|
||||
},
|
||||
Reference in New Issue
Block a user