From 86cadc6e9fa2f3022f71c9de955333f27e9ff073 Mon Sep 17 00:00:00 2001 From: Andrew Maguire Date: Sat, 14 Jan 2023 22:51:09 +0000 Subject: [PATCH] Cleanup notebooks folder (#716) - standardize file naming to be a little more pythonic. - one folder per notebook plus a README for easy GH viewing. - adjust colab badges for new structure. --- .../README.md} | 0 .../T5_closed_book_QA_generator.md | 21 - .../T5_closed_book_QA_generator.py | 406 ------------------ .../T5_closed_book_QA_generators.py | 0 notebooks/data-augmentation/README.md | 5 + .../essay-instructions/README.md} | 0 .../essay-instructions.ipynb} | 8 +- .../essay-revision/README.md} | 0 .../essay-revision/essay-revision.ipynb} | 16 +- .../stackexchange-builder/README.md} | 0 .../stackexchange-builder.ipynb} | 21 +- .../unified-qa/README.md} | 0 .../unified-qa/unified-qa.ipynb} | 12 +- ...uation.ipynb => detoxify-evaluation.ipynb} | 17 +- .../README.md} | 2 +- .../openbugger_example.ipynb | 14 +- 16 files changed, 72 insertions(+), 450 deletions(-) rename notebooks/{knowledge_extraction/T5_closed_book_QA_generators.md => closed-book-qa/README.md} (100%) delete mode 100644 notebooks/closed-book-qa/T5_closed_book_QA_generator.md delete mode 100644 notebooks/closed-book-qa/T5_closed_book_QA_generator.py rename notebooks/{knowledge_extraction => closed-book-qa}/T5_closed_book_QA_generators.py (100%) create mode 100644 notebooks/data-augmentation/README.md rename notebooks/{data-argumentation/EssayInstructions.md => data-augmentation/essay-instructions/README.md} (100%) rename notebooks/{data-argumentation/EssayInstructions.ipynb => data-augmentation/essay-instructions/essay-instructions.ipynb} (97%) rename notebooks/{data-argumentation/EssayRevision.md => data-augmentation/essay-revision/README.md} (100%) rename notebooks/{data-argumentation/EssayRevision.ipynb => data-augmentation/essay-revision/essay-revision.ipynb} (97%) rename notebooks/{data-argumentation/StackExchangeBuilder.md => data-augmentation/stackexchange-builder/README.md} (100%) rename notebooks/{data-argumentation/StackExchangeBuilder.ipynb => data-augmentation/stackexchange-builder/stackexchange-builder.ipynb} (99%) rename notebooks/{data-argumentation/UnifiedQA.md => data-augmentation/unified-qa/README.md} (100%) rename notebooks/{data-argumentation/UnifiedQA.ipynb => data-augmentation/unified-qa/unified-qa.ipynb} (97%) rename notebooks/detoxify-evaluation/{DetoxityEvaluation.ipynb => detoxify-evaluation.ipynb} (99%) rename notebooks/{code-bugger/openbugger_example.md => openbugger/README.md} (99%) rename notebooks/{code-bugger => openbugger}/openbugger_example.ipynb (96%) diff --git a/notebooks/knowledge_extraction/T5_closed_book_QA_generators.md b/notebooks/closed-book-qa/README.md similarity index 100% rename from notebooks/knowledge_extraction/T5_closed_book_QA_generators.md rename to notebooks/closed-book-qa/README.md diff --git a/notebooks/closed-book-qa/T5_closed_book_QA_generator.md b/notebooks/closed-book-qa/T5_closed_book_QA_generator.md deleted file mode 100644 index 2cae860e..00000000 --- a/notebooks/closed-book-qa/T5_closed_book_QA_generator.md +++ /dev/null @@ -1,21 +0,0 @@ -# Generate Topics, Questions, and Answers from a text - -This python code can be used to generate topics, questions, and answers from a -paragraph of text. This is a good way to generate ground truth knowledge about a -topic from a trusted source. - -The output of this is a dictionary with: - -1. submitted paragraph -1. generated topics -1. generated questions -1. generated topic prefixes that can be prepended to the questions -1. open book answer based only on the provided paragraph -1. closed book answers generated by FLAN-T5-11B - -## Contributing - -This code is verified to work on a 24GB vram graphics card (like an RTX3090). We -are working on getting it to run on google colab TPUs and also it may be -possible to use smaller T5 models like the 3 billion parameter model and still -get acceptable results. diff --git a/notebooks/closed-book-qa/T5_closed_book_QA_generator.py b/notebooks/closed-book-qa/T5_closed_book_QA_generator.py deleted file mode 100644 index 68c40100..00000000 --- a/notebooks/closed-book-qa/T5_closed_book_QA_generator.py +++ /dev/null @@ -1,406 +0,0 @@ -# This notebook will run on a system with a single RTX3090 (24 GB vram). -# You need to install accelerate, bitsandbytes, and transformers - -import math -import pickle -import time - -import torch - -# load all needed libraries -from transformers import AutoModelForSeq2SeqLM, AutoTokenizer - -# This device map will work a GPU with > 24GB vram. -# It uses nearly all the memory. -device_map_T5_13B = { - "shared": 0, - "decoder.embed_tokens": 0, - "encoder.embed_tokens": 0, - "encoder.block.0": 0, - "encoder.block.1": 0, - "encoder.block.2": 0, - "encoder.block.3": 0, - "encoder.block.4": 0, - "encoder.block.5": 0, - "encoder.block.6": 0, - "encoder.block.7": 0, - "encoder.block.8": 0, - "encoder.block.9": 0, - "encoder.block.10": 0, - "encoder.block.11": 0, - "encoder.block.12": 0, - "encoder.block.13": 0, - "encoder.block.14": 0, - "encoder.block.15": 0, - "encoder.block.16": 0, - "encoder.block.17": 0, - "encoder.block.18": 0, - "encoder.block.19": 0, - "encoder.block.20": 0, - "encoder.block.21": 0, - "encoder.block.22": 0, - "encoder.block.23": 0, - "encoder.final_layer_norm": 0, - "encoder.dropout": 0, - "decoder.block.0": 0, - "decoder.block.1": 0, - "decoder.block.2": 0, - "decoder.block.3": 0, - "decoder.block.4": 0, - "decoder.block.5": 0, - "decoder.block.6": 0, - "decoder.block.7": 0, - "decoder.block.8": 0, - "decoder.block.9": 0, - "decoder.block.10": 0, - "decoder.block.11": 0, - "decoder.block.12": 0, - "decoder.block.13": 0, - "decoder.block.14": 0, - "decoder.block.15": 0, - "decoder.block.16": 0, - "decoder.block.17": 0, - "decoder.block.18": 0, - "decoder.block.19": 0, - "decoder.block.20": 0, - "decoder.block.21": 0, - "decoder.block.22": 0, - "decoder.block.23": 0, - "decoder.final_layer_norm": 0, - "decoder.dropout": 0, - "lm_head": 0, -} - - -# Load the model in bfloat16. Make sure to use bfloat16 -# if you are doing inference with 16bit precision. -tokenizer = AutoTokenizer.from_pretrained("flan-t5-xxl") -model = AutoModelForSeq2SeqLM.from_pretrained( - "flan-t5-xxl", - device_map=device_map_T5_13B, - torch_dtype=torch.bfloat16, - load_in_8bit=False, -) - - -# Load strings as knowledge sources for QA generation. -# You can do this with a pickle. -objects = [] -with (open("paragraphs.pkl", "rb")) as openfile: - while True: - try: - objects.append(pickle.load(openfile)) - except EOFError: - break -paragraphs = objects[0] - -# Make sure no paragraphs are too long for T5. -# It handles up to 512 tokens context length. -fixed_paragraphs = [] -for k in paragraphs: - if len(k) > 1100: - pass - else: - fixed_paragraphs.append(k) -print("Original number of paragraphs:", len(paragraphs)) -print("Length filtered number of paragraphs:", len(fixed_paragraphs)) -paragraphs = fixed_paragraphs - - -# Sort_Tuple sorts a list of tuples -# by the second element. -def Sort_Tuple(tup): - tup.sort(key=lambda x: x[1], reverse=True) - return tup - - -# ask_flan_T5 takes a text input and returns the -# response of FLAN_T5 and a normalized logits -# score for the generation. -def ask_flan_T5(input_text): - inputs = tokenizer.encode(input_text, return_tensors="pt").cuda(0) - outputs = model.generate( - inputs, - do_sample=True, - top_p=0.95, - eos_token_id=1, - max_new_tokens=50, - bos_token_id=0, - temperature=0.9, - return_dict_in_generate=True, - output_scores=True, - ) - out_text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True) - probs = torch.stack(outputs.scores, dim=1).softmax(-1) - for i in outputs.sequences: - logprobs = 0 - counter = 0 - for k in i[1:]: - word_prob = (round(probs[0][counter][k.item()].item(), 2)) + 0.001 - logprobs = logprobs + math.log(word_prob) - counter += 1 - out_tuple = (out_text, round(logprobs, 2)) - return out_tuple - - -# ask_flan_T5D is a function that takes an input text and -# returns the deterministic(do_sample=False) output of -# FLAN_T5 and logits. -def ask_flan_T5D(input_text): - inputs = tokenizer.encode(input_text, return_tensors="pt").cuda(0) - outputs = model.generate( - inputs, - do_sample=False, - eos_token_id=1, - max_new_tokens=50, - bos_token_id=0, - return_dict_in_generate=True, - output_scores=True, - ) - out_text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True) - probs = torch.stack(outputs.scores, dim=1).softmax(-1) - for i in outputs.sequences: - logprobs = 0 - counter = 0 - for k in i[1:]: - word_prob = (round(probs[0][counter][k.item()].item(), 2)) + 0.001 - logprobs = logprobs + math.log(word_prob) - counter += 1 - out_tuple = (out_text, round(logprobs, 2)) - return out_tuple - - -# Generate a topic classifier for a paragraph of text -def generate_topic(paragraph): - results = set() - input_text = ( - "Task: Create a topic classifier for the provided \ - paragraph.\nParagraph:\n" - + paragraph - + "\nTopic: " - ) - for k in range(0, 20): - result = ask_flan_T5(input_text) - if result[1] > -4: - results.add(result) - if len(results) < 3: - results.add(("I was wondering", -3.3)) - results.add(("I have a question", -3.3)) - sorted_results = Sort_Tuple(list(results)) - return sorted_results[0:5] - - -# Generate a topic classifier for a paragraph of text -def generate_topic_prefix(topic_set): - results = set() - for entry in topic_set: - topic = entry[0] - input_text = ( - "Task: Create a prepositional phrase about the topic.\n\ - Example 1\n Topic: climbing mount everest\nPrepositional \ - Phrase: With regards to climbing mount everest,\nExample \ - 2\nTopic: United States Air Force\nPrepositional Phrase: \ - On the topic of the United States Air Force,\n Example 3\nTopic: " - + topic - + "\nPrepositional Phrase: " - ) - for k in range(0, 5): - results.add(ask_flan_T5(input_text)) - sorted_results = Sort_Tuple(list(results)) - return sorted_results[0:5] - - -# Generate who/what/where/when/why questions from a paragraph. -# Number of questions variable is an integer which indicates how -# many of each question type to try to generate. -def generate_questions(paragraph, number_of_questions): - if len(tokenizer.encode(paragraph)) > 480: - print("Warning, the context length is too long.") - question_set = set() - question_types = [ - "What", - "Where", - "Why", - "How", - "Who", - ] - for qtype in question_types: - question = ( - "Please generate a question that starts with '" - + qtype - + "' based on the following paragraph.\nText:\n" - + paragraph - + "\nQuestion:\n" - ) - for k in range(0, number_of_questions): - new_question = ask_flan_T5(question) - if qtype in new_question[0]: - question_set.add((qtype, new_question)) - return question_set - - -# Generate answers for a set of questions. -# Input is the paragraph of text and a set of questions where each question -# is a tuple generated from the generate_questions() function. -def generate_answers(paragraph, question_set): - possible_answers = set() - for question in question_set: - input_text = ( - "Please read the following paragraph and \ - then answer the question using only data \ - found in the text. If no answer is possible, respond \ - 'NA'.\nText:\n" - + paragraph - + "\nQuestion:\n" - + question[1][0] - + "\nAnswer:\n" - ) - answer = ask_flan_T5D(input_text) - if "NA" in answer[0]: - pass - else: - possible_answers.add((question[0], question[1], answer)) - return possible_answers - - -# Generate questions from a paragraph and set of answers. -# Input is the paragraph of text and a set of answers where each question -# is a tuple generated from the generate_answers() function. -def generate_question2(paragraph, qa_set): - qaq_results = set() - for qa_item in qa_set: - answer = qa_item[2][0] - input_text = ( - "Please read the following paragraph and \ - then generate a question whose answer is: " - + answer - + "\nParagraph:\n" - + paragraph - + "\nQuestion:\n" - ) - result = ask_flan_T5D(input_text) - qaq_results.add((qa_item[0], qa_item[1], qa_item[2], result)) - return qaq_results - - -# Generate answers from a paragraph and set of questions. -# Input is the paragraph of text and a set of questions where each answer -# is a tuple generated from the generate_questions2() function. -def generate_answers2(paragraph, question_set): - possible_answers = set() - for question in question_set: - input_text = ( - "Please read the following paragraph and \ - then answer the question using only data \ - found in the text. If no answer is possible, respond \ - 'NA'.\nText:\n" - + paragraph - + "\nQuestion:\n" - + question - + "\nAnswer:\n" - ) - answer = ask_flan_T5D(input_text) - possible_answers.add((question, answer)) - return possible_answers - - -# Generate declarative statement from question and answer pair. -def generate_declarative(qaq_set): - qaqd_results = set() - for qa_item in qaq_set: - question = qa_item[0] - answer = qa_item[1][0] - if "NA" in answer: - pass - else: - input_text = ( - "Generate a declarative statement based on the \ - given question and answer pair.\nQ: What is \ - sitting on the couch?\nA: poodle\nA poodle is \ - sitting on the couch.\nQ: " - + question - + "\nA: " - + answer - + "\n" - ) - result = ask_flan_T5D(input_text) - qaqd_results.add((question, answer, result)) - return qaqd_results - - -# Generate closed book answer to question. -def generate_closed_answer(qaqd_set): - qaqd_results = set() - for qa_item in qaqd_set: - question = qa_item[0] - answer = qa_item[2][0] - if "NA" in answer: - # print(answer) - pass - else: - input_text = ( - "Task: Answer the question in a detailed fashion. \ - If the question cannot be answered without more \ - information, please answer NA.\nExample 1:\nQuestion: \ - Why does Shala like cookies?\nAnswer: It is not possible \ - to know why Shala likes cookies without more information, \ - but many people that like cookies enjoy their taste or \ - some of their ingredients (e.g. chocolate chips or \ - peanut butter).\nExample 2:\nQuestion: Why would someone \ - vote in an election?\nAnswer: There are many reasons \ - someone might vote in an election, for instance to have \ - their voice heard or to help a candidate they like win the \ - race.\nExample 3\nQuestion: What decoration goes on top of \ - a Christmas tree?\nAnswer: Usually a star is placed at the \ - top of a Christmas tree.\nExample 4:\nQuestion: " - + question - + "\nAnswer: " - ) - result = ask_flan_T5D(input_text) - qaqd_results.add((qa_item[0], qa_item[1], qa_item[2], result)) - return qaqd_results - - -# Create a dictionary of questions and answers from a list of paragraphs. -# Takes about 20 seconds per paragraph to process. -start_time = time.perf_counter() -questions_dict = {} -uniq_id = 100000 -for paragraph in paragraphs[0:1500]: - topic_list = generate_topic(paragraph) - topic_prefix = generate_topic_prefix(topic_list) - question_set = generate_questions(paragraph, 2) - qa_set = generate_answers(paragraph, question_set) - qaq_set = generate_question2(paragraph, qa_set) - q2_set = set() - for q in qaq_set: - q2_set.add(q[3][0]) - q2a2_set = generate_answers2(paragraph, q2_set) - a2d_set = generate_declarative(q2a2_set) - a3cb_set = generate_closed_answer(a2d_set) - questions_dict[uniq_id] = {} - questions_dict[uniq_id]["topics"] = topic_list - questions_dict[uniq_id]["topic prepositions"] = topic_prefix - questions_dict[uniq_id]["paragraph"] = paragraph - entry_count = 0 - entry_dict = {} - for entry in a3cb_set: - entry_dict[entry_count] = {} - entry_dict[entry_count]["question"] = entry[0] - entry_dict[entry_count]["answer_T5_ob"] = entry[2][0] - entry_dict[entry_count]["answer_T5_cb"] = entry[3][0] - entry_count += 1 - questions_dict[uniq_id]["QA_set"] = entry_dict - uniq_id += 1 - print(uniq_id, "topics:", topic_prefix) - -stop_time = time.perf_counter() -generation_time = stop_time - start_time -print(questions_dict[uniq_id - 1]) -print(generation_time) - - -# create a binary pickle file to save your dictionary -f = open("questions_dict.pkl", "wb") -pickle.dump(questions_dict, f) -f.close() diff --git a/notebooks/knowledge_extraction/T5_closed_book_QA_generators.py b/notebooks/closed-book-qa/T5_closed_book_QA_generators.py similarity index 100% rename from notebooks/knowledge_extraction/T5_closed_book_QA_generators.py rename to notebooks/closed-book-qa/T5_closed_book_QA_generators.py diff --git a/notebooks/data-augmentation/README.md b/notebooks/data-augmentation/README.md new file mode 100644 index 00000000..f6260009 --- /dev/null +++ b/notebooks/data-augmentation/README.md @@ -0,0 +1,5 @@ +# Data Augmentation + +This folder contains subfolders of notebooks broadly relating to data +augmentation. Each subfolder contains a README.md file explaining what the +notebooks in that folder do. diff --git a/notebooks/data-argumentation/EssayInstructions.md b/notebooks/data-augmentation/essay-instructions/README.md similarity index 100% rename from notebooks/data-argumentation/EssayInstructions.md rename to notebooks/data-augmentation/essay-instructions/README.md diff --git a/notebooks/data-argumentation/EssayInstructions.ipynb b/notebooks/data-augmentation/essay-instructions/essay-instructions.ipynb similarity index 97% rename from notebooks/data-argumentation/EssayInstructions.ipynb rename to notebooks/data-augmentation/essay-instructions/essay-instructions.ipynb index 30834d32..47b628aa 100644 --- a/notebooks/data-argumentation/EssayInstructions.ipynb +++ b/notebooks/data-augmentation/essay-instructions/essay-instructions.ipynb @@ -5,7 +5,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/data-argumentation/EssayInstructions.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/data-augmentation/essay-instructions/essay-instructions.ipynb)" ] }, { @@ -210,7 +210,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3.8.10 64-bit", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -224,11 +224,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.7.4 (tags/v3.7.4:e09359112e, Jul 8 2019, 20:34:20) [MSC v.1916 64 bit (AMD64)]" }, "vscode": { "interpreter": { - "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + "hash": "25d5c2324055587ceaeef27650c79ce8358ea61d7689f2e0b8ada5d53f85bce4" } } }, diff --git a/notebooks/data-argumentation/EssayRevision.md b/notebooks/data-augmentation/essay-revision/README.md similarity index 100% rename from notebooks/data-argumentation/EssayRevision.md rename to notebooks/data-augmentation/essay-revision/README.md diff --git a/notebooks/data-argumentation/EssayRevision.ipynb b/notebooks/data-augmentation/essay-revision/essay-revision.ipynb similarity index 97% rename from notebooks/data-argumentation/EssayRevision.ipynb rename to notebooks/data-augmentation/essay-revision/essay-revision.ipynb index 2397131c..1f21fd73 100644 --- a/notebooks/data-argumentation/EssayRevision.ipynb +++ b/notebooks/data-augmentation/essay-revision/essay-revision.ipynb @@ -5,16 +5,24 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/data-argumentation/EssayRevision.ipynb)" + "# Essay Revision" ] }, { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/data-augmentation/essay-revision/essay-revision.ipynb)" + ] + }, + { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "o0lAqmWhsiUe" }, "source": [ - "#Essay Revision\n", "The goal of this notebook is to use data argumentation to have data on improving essays. The way this is done is by taking a template \"good\" essay and making step by step changes that make it worse and add intructions on how to fix it." ] }, @@ -319,11 +327,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.7.4 (tags/v3.7.4:e09359112e, Jul 8 2019, 20:34:20) [MSC v.1916 64 bit (AMD64)]" }, "vscode": { "interpreter": { - "hash": "492d89208e1af30f4727fd53e254ea56e6b1a843b376782bfa5f6ce13d676265" + "hash": "25d5c2324055587ceaeef27650c79ce8358ea61d7689f2e0b8ada5d53f85bce4" } } }, diff --git a/notebooks/data-argumentation/StackExchangeBuilder.md b/notebooks/data-augmentation/stackexchange-builder/README.md similarity index 100% rename from notebooks/data-argumentation/StackExchangeBuilder.md rename to notebooks/data-augmentation/stackexchange-builder/README.md diff --git a/notebooks/data-argumentation/StackExchangeBuilder.ipynb b/notebooks/data-augmentation/stackexchange-builder/stackexchange-builder.ipynb similarity index 99% rename from notebooks/data-argumentation/StackExchangeBuilder.ipynb rename to notebooks/data-augmentation/stackexchange-builder/stackexchange-builder.ipynb index b0dd9a8b..165ac336 100644 --- a/notebooks/data-argumentation/StackExchangeBuilder.ipynb +++ b/notebooks/data-augmentation/stackexchange-builder/stackexchange-builder.ipynb @@ -5,16 +5,24 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/data-argumentation/StackExchangeBuilder.ipynb)" + "# Ingest StackExchange data dumps" ] }, { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/data-augmentation/stackexchange-builder/stackexchange-builder.ipynb)" + ] + }, + { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "TB7CEfs8F-8u" }, "source": [ - "# Ingest StackExchange data dumps\n", "This notebook takes a StackExchange Data dump \"Posts.xml\" file and ingests it into a Pandas Dataframe. Outputs of the file can be JSON, JSONL, Parquet, or CSV. " ] }, @@ -1842,10 +1850,17 @@ }, "kernelspec": { "display_name": "Python 3", + "language": "python", "name": "python3" }, "language_info": { - "name": "python" + "name": "python", + "version": "3.7.4 (tags/v3.7.4:e09359112e, Jul 8 2019, 20:34:20) [MSC v.1916 64 bit (AMD64)]" + }, + "vscode": { + "interpreter": { + "hash": "25d5c2324055587ceaeef27650c79ce8358ea61d7689f2e0b8ada5d53f85bce4" + } } }, "nbformat": 4, diff --git a/notebooks/data-argumentation/UnifiedQA.md b/notebooks/data-augmentation/unified-qa/README.md similarity index 100% rename from notebooks/data-argumentation/UnifiedQA.md rename to notebooks/data-augmentation/unified-qa/README.md diff --git a/notebooks/data-argumentation/UnifiedQA.ipynb b/notebooks/data-augmentation/unified-qa/unified-qa.ipynb similarity index 97% rename from notebooks/data-argumentation/UnifiedQA.ipynb rename to notebooks/data-augmentation/unified-qa/unified-qa.ipynb index 4b93f596..37c85b3d 100644 --- a/notebooks/data-argumentation/UnifiedQA.ipynb +++ b/notebooks/data-augmentation/unified-qa/unified-qa.ipynb @@ -9,11 +9,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "b2e3c95c", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/data-argumentation/UnifiedQA.ipynb)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/data-augmentation/unified-qa/unified-qa.ipynb)" ] }, { @@ -493,7 +494,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -507,7 +508,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.7.4 (tags/v3.7.4:e09359112e, Jul 8 2019, 20:34:20) [MSC v.1916 64 bit (AMD64)]" + }, + "vscode": { + "interpreter": { + "hash": "25d5c2324055587ceaeef27650c79ce8358ea61d7689f2e0b8ada5d53f85bce4" + } } }, "nbformat": 4, diff --git a/notebooks/detoxify-evaluation/DetoxityEvaluation.ipynb b/notebooks/detoxify-evaluation/detoxify-evaluation.ipynb similarity index 99% rename from notebooks/detoxify-evaluation/DetoxityEvaluation.ipynb rename to notebooks/detoxify-evaluation/detoxify-evaluation.ipynb index 5ec0f019..ff2eca83 100644 --- a/notebooks/detoxify-evaluation/DetoxityEvaluation.ipynb +++ b/notebooks/detoxify-evaluation/detoxify-evaluation.ipynb @@ -5,7 +5,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/detoxify-evaluation/DetoxityEvaluation.ipynb)" + "# Detoxify evaluation" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/detoxify-evaluation/detoxify-evaluation.ipynb)" ] }, { @@ -23,7 +31,6 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Detoxify evaluation\n", "[Detoxify](https://github.com/unitaryai/detoxify) is a open source model used to identify prompts as toxic\n", "\n", "\"Image\n", @@ -472,7 +479,7 @@ ], "metadata": { "kernelspec": { - "display_name": "DetoxifyEvaluation", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -486,12 +493,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.7.4 (tags/v3.7.4:e09359112e, Jul 8 2019, 20:34:20) [MSC v.1916 64 bit (AMD64)]" }, "orig_nbformat": 4, "vscode": { "interpreter": { - "hash": "aeda4fe49bddd52f429be231bf767df53f2b167abae0a465a8ef142aa6b97b8a" + "hash": "25d5c2324055587ceaeef27650c79ce8358ea61d7689f2e0b8ada5d53f85bce4" } } }, diff --git a/notebooks/code-bugger/openbugger_example.md b/notebooks/openbugger/README.md similarity index 99% rename from notebooks/code-bugger/openbugger_example.md rename to notebooks/openbugger/README.md index d8611dd7..428bdfec 100644 --- a/notebooks/code-bugger/openbugger_example.md +++ b/notebooks/openbugger/README.md @@ -1,4 +1,4 @@ -# OpenBuggerNotebook +# OpenBugger https://github.com/furlat/OpenBugger/blob/main/README.md is a Python package that allows you to inject syntax and logic errors into your code. This can be diff --git a/notebooks/code-bugger/openbugger_example.ipynb b/notebooks/openbugger/openbugger_example.ipynb similarity index 96% rename from notebooks/code-bugger/openbugger_example.ipynb rename to notebooks/openbugger/openbugger_example.ipynb index 6e2acd27..62085f8e 100644 --- a/notebooks/code-bugger/openbugger_example.ipynb +++ b/notebooks/openbugger/openbugger_example.ipynb @@ -5,7 +5,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/code-bugger/openbugger_example.ipynb)" + "# OpenBugger Example" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LAION-AI/Open-Assistant/blob/main/notebooks/openbugger/openbugger_example.ipynb)" ] }, { @@ -272,12 +280,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6 (tags/v3.10.6:9c7b4bd, Aug 1 2022, 21:53:49) [MSC v.1932 64 bit (AMD64)]" + "version": "3.7.4 (tags/v3.7.4:e09359112e, Jul 8 2019, 20:34:20) [MSC v.1916 64 bit (AMD64)]" }, "orig_nbformat": 4, "vscode": { "interpreter": { - "hash": "ceba285e8b4e6478fe8ad229bc63940a90ad5cf3d143521e7c38823a2e915b21" + "hash": "25d5c2324055587ceaeef27650c79ce8358ea61d7689f2e0b8ada5d53f85bce4" } } },