From ad98a282410664bf03cbbaf1b835b440a26d0409 Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Fri, 30 Dec 2022 17:25:50 +0000
Subject: [PATCH 01/53] [feature] add rank dataset for webgpt and human
 feedback summary

---
 model/reward/instructor/README.md             |   7 +
 model/reward/instructor/TODO.md               |  12 ++
 model/reward/instructor/cls_dataset.py        |  73 +++++++++
 .../reward/instructor/experimental_dataset.py |  11 ++
 model/reward/instructor/rank_datasets.py      | 145 ++++++++++++++++++
 model/reward/instructor/tests/__init__.py     |   0
 model/reward/instructor/tests/test_dataset.py |  28 ++++
 model/reward/instructor/trainer.py            |   2 +
 model/reward/instructor/utils.py              |  18 +++
 model/utils.py                                |   4 +
 10 files changed, 300 insertions(+)
 create mode 100644 model/reward/instructor/README.md
 create mode 100644 model/reward/instructor/TODO.md
 create mode 100644 model/reward/instructor/cls_dataset.py
 create mode 100644 model/reward/instructor/experimental_dataset.py
 create mode 100644 model/reward/instructor/rank_datasets.py
 create mode 100644 model/reward/instructor/tests/__init__.py
 create mode 100644 model/reward/instructor/tests/test_dataset.py
 create mode 100644 model/reward/instructor/trainer.py
 create mode 100644 model/reward/instructor/utils.py
 create mode 100644 model/utils.py

diff --git a/model/reward/instructor/README.md b/model/reward/instructor/README.md
new file mode 100644
index 00000000..7dbfefbc
--- /dev/null
+++ b/model/reward/instructor/README.md
@@ -0,0 +1,7 @@
+
+
+
+```bash
+
+
+```
\ No newline at end of file
diff --git a/model/reward/instructor/TODO.md b/model/reward/instructor/TODO.md
new file mode 100644
index 00000000..33bc6595
--- /dev/null
+++ b/model/reward/instructor/TODO.md
@@ -0,0 +1,12 @@
+
+Some other reward features we can use
+
+
+Summaries from human feedback
+
+* use `confidence` score into the RM learning, ensure the output rank score correlates with confidence
+
+* each labeling has a labeling `note`, basically comments by labeler, not sure what else we can use
+
+
+
diff --git a/model/reward/instructor/cls_dataset.py b/model/reward/instructor/cls_dataset.py
new file mode 100644
index 00000000..54bbd19e
--- /dev/null
+++ b/model/reward/instructor/cls_dataset.py
@@ -0,0 +1,73 @@
+'''
+
+    classification based ranking
+
+'''
+import os
+import json
+import random
+import torch
+import numpy as np
+from dataset import load_dataset
+from torch.utils.data import Dataset
+from .utils import webgpt_return_format
+
+class WebGPTDataset(Dataset):
+    def __init__(self, mode='train', index_cache='dataset/webgpt_train_idx.pt', additional_dataset=None) -> None:
+        super().__init__()
+        '''
+            mode : train or val, used for validation purpose, has nothing to do with original split
+            additional_dataset : a list of jsonline format with idx, question and texts (generate candidates)
+                idx : must match the index you iterate from comparison enumerate order
+                question : for validation purpose
+                texts : list of K generate results from the question prompt
+        '''
+        os.makedirs('dataset', exist_ok=True)
+        dataset = load_dataset("openai/webgpt_comparisons")
+        if os.path.exists(index_cache):
+            train_idx = torch.load(index_cache)
+        else:
+            train_idx = np.random.choice(range(len(dataset['train'])), int(len(dataset['train'])*0.8), replace=False)
+            torch.save(set(train_idx.tolist()), index_cache)
+        self.dataset = []
+        self.dataset_index = []
+        for idx, row in enumerate(dataset['train']):
+            if mode == 'train' and idx in train_idx:
+                self.dataset.append(webgpt_return_format(row))
+                self.dataset_index.append(idx)
+            elif idx not in train_idx and mode != 'train':
+                self.dataset.append(webgpt_return_format(row))
+                self.dataset_index.append(idx)
+
+        # since this dataset was generated from 176B GPT-3
+        # we needed some more sample generated from the starting model
+        # since this model must rank model generated by GPT-3 being better than your starting model
+        self.sample_additional = False
+        if additional_dataset is not None:
+            self.sample_additional = True
+            self.additional = {}
+            with open(additional_dataset, 'r') as f:
+                for line in f:
+                    row = json.loads(line)
+                    if row['idx'] in self.dataset_index:
+                        self.additional[row['idx']] = row['negatives']
+            if len(self.additional) != len(self.dataset_index):
+                for match_idx in self.dataset_index:
+                    if match_idx in self.additional:
+                        continue
+
+                    idx = match_idx-900
+                    while idx not in self.additional:
+                        idx -= 1
+                    self.additional[match_idx] = self.additional[idx]
+
+    def __len__(self):
+        return len(self.dataset)
+
+    def __getitem__(self, index):
+        row = self.dataset[index]
+        if not self.sample_additional:
+            return row['question'], row['pos'], row['neg']
+
+        gen_neg = random.choice(self.additional[self.dataset_index[index]])
+        return row['question'], row['pos'], row['neg'], gen_neg
diff --git a/model/reward/instructor/experimental_dataset.py b/model/reward/instructor/experimental_dataset.py
new file mode 100644
index 00000000..145588c4
--- /dev/null
+++ b/model/reward/instructor/experimental_dataset.py
@@ -0,0 +1,11 @@
+'''
+    
+
+'''
+import os
+import json
+import random
+import torch
+import numpy as np
+from dataset import load_dataset
+from torch.utils.data import Dataset
diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py
new file mode 100644
index 00000000..7fef5ab7
--- /dev/null
+++ b/model/reward/instructor/rank_datasets.py
@@ -0,0 +1,145 @@
+'''
+    author: theblackcat102
+
+    A list of rank based dataset for training using rank loss
+
+    Some nice features to have
+
+    [ ] 
+
+'''
+import os
+import glob
+import json
+import numpy as np
+from torch.utils.data import Dataset
+from datasets import load_dataset
+
+class CollateFN():
+    def __init__(self, tokenizer, max_length=400) -> None:
+        self.tokenizer = tokenizer
+        self.max_length = max_length
+
+    def __call__(self, batch):
+        prompts = []
+        pos_sentences = []
+        neg_sentences = []
+        for prompt, pairs in batch:
+            for (pos, neg) in pairs:
+                prompts.append(prompt)
+                pos_sentences.append(pos)
+                neg_sentences.append(neg)
+
+        batch = [self.tokenizer(prompts, pos_sentences, return_tensors='pt', max_length=self.max_length, padding=True, truncation=True),\
+                self.tokenizer(prompts, neg_sentences, return_tensors='pt', max_length=self.max_length, padding=True, truncation=True)]
+        return batch
+
+class WebGPT(Dataset):
+
+    def __init__(self) -> None:
+        super().__init__()
+
+        dataset = load_dataset("openai/webgpt_comparisons")
+        questions = {}
+        # using prompt as our index will allows us
+        # to add additional generated prompt later
+        self.index2question = {}
+        for row in dataset['train']:
+            question = row['question']['full_text']
+            if question not in self.index2question:
+                self.index2question[len(self.index2question)] = question
+
+            if question not in questions:
+                questions[question] = []
+
+            if row['score_0'] > row['score_1']:
+                # not going to risk it
+                questions[question].append((
+                    row['answer_0'], row['answer_1']
+                ))
+            else:
+                questions[question].append((
+                    row['answer_1'], row['answer_0']
+                ))
+
+        self.questions = questions
+
+    def __len__(self):
+        return len(self.index2question)
+
+    def __getitem__(self, index):
+        question = self.index2question[index]
+        rows = self.questions[question]
+        # optimize the format later
+        return question, rows
+
+
+
+
+class HFSummary(Dataset):
+    '''
+        Human feedback data from OpenAI
+        https://github.com/openai/summarize-from-feedback
+
+            >> azcopy copy "https://openaipublic.blob.core.windows.net/summarize-from-feedback/dataset/*" . --recursive
+        
+        choice : 0 or 1
+
+    '''
+    def __init__(self, split='train',
+        path='summarize-from-feedback/comparisons/*.json',
+        conf_threshold=-1,
+        max_comparison_per_sample=5) -> None:
+        super().__init__()
+        assert split in ('train', 'valid1', 'valid2', 'test')
+        summaries = {}
+        # using prompt as our index will allows us
+        # to add additional generated prompt later
+        self.index2summary = {}
+        self.max_comparison_per_sample = max_comparison_per_sample
+        for jsonl_file in glob.glob(path):
+            with open(jsonl_file, 'r') as f:
+                for line in f:
+                    data = json.loads(line)
+                    if data['split'] != split:
+                        continue
+                    if 'extra' in data and \
+                        'confidence' in data['extra'] and \
+                        conf_threshold > data['extra']['confidence']:
+                        print('skipping {}'.format(data['info']['id']))
+                        continue
+
+                    if 'article' in data['info']:
+                        context = data['info']['article']
+                    elif 'post' in data['info']:
+                        context = data['info']['post']
+
+                    if context not in self.index2summary:
+                        self.index2summary[len(self.index2summary)] = context
+                    
+                    if context not in summaries:
+                        summaries[context] = []
+
+                    pos, neg = (0, 1) if data['choice'] == 0 else (1, 0)
+                    summaries[context].append((
+                        data['summaries'][pos]['text'],
+                        data['summaries'][neg]['text']
+                    ))
+
+        self.summaries = summaries
+
+    def __len__(self):
+        return len(self.index2summary)
+
+    def __getitem__(self, index):
+        context = self.index2summary[index]
+        # return pairs of comparison
+        rows = self.summaries[context]
+        # pair very big
+        # we are going to do some sampling
+        # not optimal but good for now
+        valid_idx = np.random.choice(len(rows), self.max_comparison_per_sample)
+        # optimize the format later
+        return context, [ r for idx, r in enumerate(rows) if idx in valid_idx ]
+
+
diff --git a/model/reward/instructor/tests/__init__.py b/model/reward/instructor/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/model/reward/instructor/tests/test_dataset.py b/model/reward/instructor/tests/test_dataset.py
new file mode 100644
index 00000000..4dd59c16
--- /dev/null
+++ b/model/reward/instructor/tests/test_dataset.py
@@ -0,0 +1,28 @@
+from transformers import AutoTokenizer
+from torch.utils.data import DataLoader
+from rank_datasets import WebGPT, HFSummary, CollateFN
+
+
+def test_hfsummary():
+    
+    tokenizer = AutoTokenizer.from_pretrained("bigscience/mt0-large")
+    collate_fn = CollateFN(tokenizer)
+    dataset = HFSummary()
+    dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=8)
+    for batch in dataloader:
+        print(batch[0]['input_ids'].shape)
+ 
+
+def test_webgpt():
+    
+    tokenizer = AutoTokenizer.from_pretrained("bigscience/mt0-large")
+    collate_fn = CollateFN(tokenizer)
+    dataset = WebGPT()
+    dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=32)
+    for batch in dataloader:
+        print(batch[0]['input_ids'].shape)
+
+
+if __name__ == "__main__":
+    test_hfsummary()
+    # test_webgpt()
\ No newline at end of file
diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py
new file mode 100644
index 00000000..9ee5e043
--- /dev/null
+++ b/model/reward/instructor/trainer.py
@@ -0,0 +1,2 @@
+import wandb
+from accelerate import Accelerator
diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py
new file mode 100644
index 00000000..1487947c
--- /dev/null
+++ b/model/reward/instructor/utils.py
@@ -0,0 +1,18 @@
+import re
+
+re_reference_remove = re.compile(r'\[([0-9])+\]|\[([0-9])+,([0-9])+\]')
+
+def webgpt_return_format(row):
+    if row['score_0'] >= row['score_1']:
+        # remove this to prevent information leak, since we are not using reference
+        return {
+                'question': row['question']['full_text'],
+                     'pos': re_reference_remove.sub('', row['answer_0']),
+                     'neg': re_reference_remove.sub('', row['answer_1'])
+                }
+
+    return {
+            'question': row['question']['full_text'],
+                 'pos': re_reference_remove.sub('', row['answer_1']),
+                 'neg': re_reference_remove.sub('', row['answer_0'])
+            }
diff --git a/model/utils.py b/model/utils.py
new file mode 100644
index 00000000..579b3f6e
--- /dev/null
+++ b/model/utils.py
@@ -0,0 +1,4 @@
+from transformers import AutoTokenizer
+
+
+def update_galactica_tokenizer():
\ No newline at end of file

From bcd5c52b3b370a217042b2ccb1983e113ecf6193 Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sat, 31 Dec 2022 03:02:10 +0000
Subject: [PATCH 02/53] [feature] working trainer code

---
 .vscode/settings.json                         |   2 +-
 .../reward/instructor/experimental_dataset.py |  10 +-
 model/reward/instructor/rank_datasets.py      |  49 ++++++---
 model/reward/instructor/tests/test_dataset.py |  10 +-
 model/reward/instructor/trainer.py            | 104 +++++++++++++++++-
 model/reward/instructor/utils.py              |  23 ++++
 6 files changed, 174 insertions(+), 24 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 56a51f78..4c58a32f 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,4 +1,4 @@
 {
-  "python.formatting.provider": "black",
+  "python.formatting.provider": "autopep8",
   "python.analysis.extraPaths": ["${workspaceFolder}/oasst-shared"]
 }
diff --git a/model/reward/instructor/experimental_dataset.py b/model/reward/instructor/experimental_dataset.py
index 145588c4..f705ccf6 100644
--- a/model/reward/instructor/experimental_dataset.py
+++ b/model/reward/instructor/experimental_dataset.py
@@ -1,5 +1,11 @@
 '''
-    
+    HFSummary
+
+        I want to train a multi regression model on axis_evals dataset mainly we can estimate the score of these score
+
+         - {"overall": "6", "accuracy": "6", "coverage": "6", "coherence": "7"}
+
+        Should be better than just a preference score
 
 '''
 import os
@@ -9,3 +15,5 @@ import torch
 import numpy as np
 from dataset import load_dataset
 from torch.utils.data import Dataset
+
+
diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py
index 7fef5ab7..e407b30f 100644
--- a/model/reward/instructor/rank_datasets.py
+++ b/model/reward/instructor/rank_datasets.py
@@ -8,32 +8,51 @@
     [ ] 
 
 '''
+from typing import Optional, Union
 import os
 import glob
 import json
+from dataclasses import dataclass
 import numpy as np
 from torch.utils.data import Dataset
+import torch
 from datasets import load_dataset
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
 
-class CollateFN():
-    def __init__(self, tokenizer, max_length=400) -> None:
-        self.tokenizer = tokenizer
-        self.max_length = max_length
+@dataclass
+class DataCollatorForPairRank:
+    """
 
-    def __call__(self, batch):
-        prompts = []
-        pos_sentences = []
-        neg_sentences = []
-        for prompt, pairs in batch:
+    Data collator that will dynamically pad the inputs for multiple choice received.
+
+    """
+    tokenizer: PreTrainedTokenizerBase
+    num_choices: int = 2
+    padding: Union[bool, str, PaddingStrategy] = True
+    max_length: Optional[int] = None
+    pad_to_multiple_of: Optional[int] = None
+
+    def __call__(self, features):
+
+        flatten_features = []
+        batch_size = 0
+        for question, pairs in features:
             for (pos, neg) in pairs:
-                prompts.append(prompt)
-                pos_sentences.append(pos)
-                neg_sentences.append(neg)
-
-        batch = [self.tokenizer(prompts, pos_sentences, return_tensors='pt', max_length=self.max_length, padding=True, truncation=True),\
-                self.tokenizer(prompts, neg_sentences, return_tensors='pt', max_length=self.max_length, padding=True, truncation=True)]
+                flatten_features.append(self.tokenizer(question, pos, truncation=True))
+                flatten_features.append(self.tokenizer(question, neg, truncation=True))
+                batch_size += 1
+        
+        batch = self.tokenizer.pad(
+            flatten_features,
+            padding=self.padding,
+            max_length=self.max_length,
+            pad_to_multiple_of=self.pad_to_multiple_of,
+            return_tensors="pt",
+        )
+        # batch = {k: v.view(batch_size, self.num_choices, -1) for k, v in batch.items()}
         return batch
 
+
 class WebGPT(Dataset):
 
     def __init__(self) -> None:
diff --git a/model/reward/instructor/tests/test_dataset.py b/model/reward/instructor/tests/test_dataset.py
index 4dd59c16..c452786b 100644
--- a/model/reward/instructor/tests/test_dataset.py
+++ b/model/reward/instructor/tests/test_dataset.py
@@ -1,26 +1,26 @@
 from transformers import AutoTokenizer
 from torch.utils.data import DataLoader
-from rank_datasets import WebGPT, HFSummary, CollateFN
+from rank_datasets import WebGPT, HFSummary, DataCollatorForMultipleChoice
 
 
 def test_hfsummary():
     
     tokenizer = AutoTokenizer.from_pretrained("bigscience/mt0-large")
-    collate_fn = CollateFN(tokenizer)
+    collate_fn = DataCollatorForMultipleChoice(tokenizer, max_length=200)
     dataset = HFSummary()
     dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=8)
     for batch in dataloader:
-        print(batch[0]['input_ids'].shape)
+        print(batch['input_ids'].shape)
  
 
 def test_webgpt():
     
     tokenizer = AutoTokenizer.from_pretrained("bigscience/mt0-large")
-    collate_fn = CollateFN(tokenizer)
+    collate_fn = DataCollatorForMultipleChoice(tokenizer, max_length=200)
     dataset = WebGPT()
     dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=32)
     for batch in dataloader:
-        print(batch[0]['input_ids'].shape)
+        print(batch['input_ids'].shape)
 
 
 if __name__ == "__main__":
diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py
index 9ee5e043..43a5f8ef 100644
--- a/model/reward/instructor/trainer.py
+++ b/model/reward/instructor/trainer.py
@@ -1,2 +1,102 @@
-import wandb
-from accelerate import Accelerator
+from typing import Callable, List, Optional, Tuple, Union, Dict
+import torch
+from torch import nn
+import numpy as np
+import evaluate
+from dataclasses import dataclass
+from torch.utils.data import Dataset
+from transformers import AutoModelForSequenceClassification, AutoModelForMultipleChoice
+from transformers import Trainer, PreTrainedModel, TrainingArguments, DataCollator, EvalPrediction, TrainerCallback, PreTrainedTokenizerBase
+from rank_datasets import DataCollatorForPairRank, WebGPT
+from utils import get_tokenizer, train_val_dataset
+
+accuracy = evaluate.load("accuracy")
+
+@dataclass
+class CustomTrainingArguments(TrainingArguments):
+    loss_function: str='rank'
+
+
+def compute_metrics(eval_pred):
+    predictions, _ = eval_pred
+    predictions = np.argmax(predictions, axis=1)
+    return accuracy.compute(predictions=predictions, references=[0]*predictions.shape[0])
+
+class RankLoss(nn.Module):
+    def __init__(self, eps=1e-8) -> None:
+        super().__init__()
+        self.eps = eps
+        self.log_sigmoid = nn.LogSigmoid()
+
+    def forward(self, pos, neg):
+        return -self.log_sigmoid(pos - neg + self.eps).mean()
+
+
+class RankTrainer(Trainer):
+    def __init__(self, model: Union[PreTrainedModel, nn.Module] = None,
+                 args: TrainingArguments = None,
+                 data_collator: Optional[DataCollator] = None,
+                 train_dataset: Optional[Dataset] = None,
+                 eval_dataset: Optional[Dataset] = None,
+                 tokenizer: Optional[PreTrainedTokenizerBase] = None,
+                 model_init: Callable[[], PreTrainedModel] = None,
+                 compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
+                 callbacks: Optional[List[TrainerCallback]] = None,
+                 optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
+                 preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] = None):
+        super().__init__(model, args, data_collator, train_dataset, eval_dataset, tokenizer,
+                         model_init, compute_metrics, callbacks, optimizers, preprocess_logits_for_metrics)
+        self.loss_fct = RankLoss() if args.loss_function == 'rank' else nn.CrossEntropyLoss()
+        self.loss_function = args.loss_function
+
+    def compute_loss(self, model, inputs, return_outputs=False):
+        # forward pass
+        outputs = model(**inputs)
+        logits = outputs.get("logits").view(-1, 2)
+        if self.loss_function == 'rank':
+            loss = self.loss_fct(logits[:, 0], logits[:, 1])
+        else:
+            loss = self.loss_fct(logits, torch.zeros(logits.shape[0], device=logits.device, dtype=torch.long))
+
+        return (loss, outputs) if return_outputs else loss
+
+
+if __name__ == "__main__":
+    model_name = 'bigscience/bloomz-560m'
+    model_name = 'google/electra-base-discriminator'
+    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type='regression')
+    tokenizer = get_tokenizer(model_name)
+    args = CustomTrainingArguments(
+        output_dir=f"outputs/{model_name}-finetuned",
+        fp16=True,
+        num_train_epochs=4,
+        warmup_steps=500,
+        learning_rate=3e-5,
+        # half_precision_backend="apex",
+        gradient_checkpointing=False,
+        gradient_accumulation_steps=6,
+        per_device_train_batch_size=12,
+        per_device_eval_batch_size=5,
+        weight_decay=0.01,
+        max_grad_norm=2.0,
+        logging_steps=10,
+        save_total_limit=4,
+        evaluation_strategy='steps',
+        loss_function='rank',
+        eval_steps=500,
+        save_steps=1000,
+        report_to="wandb",
+        run_name='reward-model'
+    )
+    dataset = WebGPT()
+    train, eval = train_val_dataset(dataset)
+    collate_fn = DataCollatorForPairRank(tokenizer, max_length=400)
+    trainer = RankTrainer(
+        model,
+        args,
+        train_dataset=train,
+        eval_dataset=eval,
+        data_collator=collate_fn,
+        tokenizer=tokenizer
+    )
+    trainer.train()
diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py
index 1487947c..10f84193 100644
--- a/model/reward/instructor/utils.py
+++ b/model/reward/instructor/utils.py
@@ -1,4 +1,7 @@
 import re
+from torch.utils.data import Subset
+from sklearn.model_selection import train_test_split
+from transformers import AutoTokenizer
 
 re_reference_remove = re.compile(r'\[([0-9])+\]|\[([0-9])+,([0-9])+\]')
 
@@ -16,3 +19,23 @@ def webgpt_return_format(row):
                  'pos': re_reference_remove.sub('', row['answer_1']),
                  'neg': re_reference_remove.sub('', row['answer_0'])
             }
+
+
+def get_tokenizer(tokenizer_name):
+    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
+    if 'galactica' in tokenizer_name:
+        tokenizer.add_special_tokens({'pad_token':'<pad>', 'eos_token': '</s>' })
+
+    return tokenizer
+
+
+
+def train_val_dataset(dataset, val_split=0.2):
+    train_idx, val_idx = train_test_split(list(range(len(dataset))), 
+        test_size=val_split, random_state=666, shuffle=True)
+    # [3879, 11479, 8341, 9177, 10798, 18177, 5735, 15669, 4837, 2760]
+    print(val_idx[:10])
+    # [13582, 5919, 11875, 7373, 19135, 13706, 8555, 15788, 15005, 15209]
+    print(train_idx[:10])
+    return Subset(dataset, train_idx), Subset(dataset, val_idx)
+

From b2ef4695a0e0b72ff9e3d4c14ae85b9c35ec24da Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sat, 31 Dec 2022 03:47:54 +0000
Subject: [PATCH 03/53] [fix] Fix missing accuracy and eval loss

---
 model/reward/instructor/trainer.py | 43 +++++++++++++++++++++++-------
 1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py
index 43a5f8ef..45ee76c6 100644
--- a/model/reward/instructor/trainer.py
+++ b/model/reward/instructor/trainer.py
@@ -1,4 +1,6 @@
-from typing import Callable, List, Optional, Tuple, Union, Dict
+import os
+os.environ['WANDB_PROJECT'] = 'reward-model'
+from typing import Any, Callable, List, Optional, Tuple, Union, Dict
 import torch
 from torch import nn
 import numpy as np
@@ -60,6 +62,29 @@ class RankTrainer(Trainer):
 
         return (loss, outputs) if return_outputs else loss
 
+    def _compute_loss(self, model, inputs):
+        inputs = self._prepare_inputs(inputs)
+        outputs = model(**inputs)
+        logits = outputs.get("logits").view(-1, 2)
+        if self.loss_function == 'rank':
+            loss = self.loss_fct(logits[:, 0], logits[:, 1])
+        else:
+            loss = self.loss_fct(logits, torch.zeros(logits.shape[0], device=logits.device, dtype=torch.long))
+
+        return loss, logits
+
+    def prediction_step(self, model: nn.Module, inputs: Dict[str, Union[torch.Tensor, Any]], prediction_loss_only: bool, ignore_keys: Optional[List[str]] = None) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
+
+        with torch.no_grad():
+            # compute loss on predict data
+            loss, logits = self._compute_loss(model, inputs)
+
+        loss = loss.mean().detach()
+        labels = torch.zeros(logits.shape[0], device=logits.device, dtype=torch.long)
+        if self.args.prediction_loss_only:
+            return (loss, None, None)
+
+        return (loss, logits, labels)
 
 if __name__ == "__main__":
     model_name = 'bigscience/bloomz-560m'
@@ -67,26 +92,25 @@ if __name__ == "__main__":
     model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type='regression')
     tokenizer = get_tokenizer(model_name)
     args = CustomTrainingArguments(
-        output_dir=f"outputs/{model_name}-finetuned",
-        fp16=True,
+        output_dir=f"{model_name}-finetuned",
         num_train_epochs=4,
         warmup_steps=500,
+        loss_function='rank',
         learning_rate=3e-5,
         # half_precision_backend="apex",
+        fp16=True,
         gradient_checkpointing=False,
-        gradient_accumulation_steps=6,
-        per_device_train_batch_size=12,
+        gradient_accumulation_steps=5,
+        per_device_train_batch_size=16,
         per_device_eval_batch_size=5,
         weight_decay=0.01,
         max_grad_norm=2.0,
         logging_steps=10,
         save_total_limit=4,
         evaluation_strategy='steps',
-        loss_function='rank',
         eval_steps=500,
         save_steps=1000,
-        report_to="wandb",
-        run_name='reward-model'
+        report_to='wandb'
     )
     dataset = WebGPT()
     train, eval = train_val_dataset(dataset)
@@ -97,6 +121,7 @@ if __name__ == "__main__":
         train_dataset=train,
         eval_dataset=eval,
         data_collator=collate_fn,
-        tokenizer=tokenizer
+        tokenizer=tokenizer,
+        compute_metrics=compute_metrics
     )
     trainer.train()

From 3a10f1024ab16a00acb42b400ac5195a0aec07b5 Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sat, 31 Dec 2022 09:27:09 +0000
Subject: [PATCH 04/53] [fix] Fix truncation in collate fn

---
 model/reward/instructor/rank_datasets.py | 11 +++++++----
 model/reward/instructor/trainer.py       | 15 ++++++++-------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py
index e407b30f..128baafe 100644
--- a/model/reward/instructor/rank_datasets.py
+++ b/model/reward/instructor/rank_datasets.py
@@ -38,8 +38,10 @@ class DataCollatorForPairRank:
         batch_size = 0
         for question, pairs in features:
             for (pos, neg) in pairs:
-                flatten_features.append(self.tokenizer(question, pos, truncation=True))
-                flatten_features.append(self.tokenizer(question, neg, truncation=True))
+                flatten_features.append(self.tokenizer(question, pos,
+                    truncation=True, max_length=self.max_length))
+                flatten_features.append(self.tokenizer(question, neg,
+                    truncation=True, max_length=self.max_length))
                 batch_size += 1
         
         batch = self.tokenizer.pad(
@@ -147,6 +149,8 @@ class HFSummary(Dataset):
 
         self.summaries = summaries
 
+        self.postfix_prompt = ' TLDR;'
+
     def __len__(self):
         return len(self.index2summary)
 
@@ -159,6 +163,5 @@ class HFSummary(Dataset):
         # not optimal but good for now
         valid_idx = np.random.choice(len(rows), self.max_comparison_per_sample)
         # optimize the format later
-        return context, [ r for idx, r in enumerate(rows) if idx in valid_idx ]
-
+        return context+self.postfix_prompt, [ r for idx, r in enumerate(rows) if idx in valid_idx ]
 
diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py
index 45ee76c6..586c8d47 100644
--- a/model/reward/instructor/trainer.py
+++ b/model/reward/instructor/trainer.py
@@ -6,10 +6,10 @@ from torch import nn
 import numpy as np
 import evaluate
 from dataclasses import dataclass
-from torch.utils.data import Dataset
+from torch.utils.data import Dataset, ConcatDataset
 from transformers import AutoModelForSequenceClassification, AutoModelForMultipleChoice
 from transformers import Trainer, PreTrainedModel, TrainingArguments, DataCollator, EvalPrediction, TrainerCallback, PreTrainedTokenizerBase
-from rank_datasets import DataCollatorForPairRank, WebGPT
+from rank_datasets import DataCollatorForPairRank, WebGPT, HFSummary
 from utils import get_tokenizer, train_val_dataset
 
 accuracy = evaluate.load("accuracy")
@@ -88,7 +88,7 @@ class RankTrainer(Trainer):
 
 if __name__ == "__main__":
     model_name = 'bigscience/bloomz-560m'
-    model_name = 'google/electra-base-discriminator'
+    model_name = 'google/electra-large-discriminator'
     model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type='regression')
     tokenizer = get_tokenizer(model_name)
     args = CustomTrainingArguments(
@@ -99,9 +99,9 @@ if __name__ == "__main__":
         learning_rate=3e-5,
         # half_precision_backend="apex",
         fp16=True,
-        gradient_checkpointing=False,
-        gradient_accumulation_steps=5,
-        per_device_train_batch_size=16,
+        gradient_checkpointing=True,
+        gradient_accumulation_steps=8,
+        per_device_train_batch_size=8,
         per_device_eval_batch_size=5,
         weight_decay=0.01,
         max_grad_norm=2.0,
@@ -114,7 +114,8 @@ if __name__ == "__main__":
     )
     dataset = WebGPT()
     train, eval = train_val_dataset(dataset)
-    collate_fn = DataCollatorForPairRank(tokenizer, max_length=400)
+    train = ConcatDataset([train, HFSummary()])
+    collate_fn = DataCollatorForPairRank(tokenizer, max_length=440)
     trainer = RankTrainer(
         model,
         args,

From d2572d032301cff6c4304fd54952d2f49fe1eecd Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sat, 31 Dec 2022 09:42:49 +0000
Subject: [PATCH 05/53] [fix] Add drop_token_type to use galactica

---
 model/reward/instructor/rank_datasets.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py
index 128baafe..41740dcf 100644
--- a/model/reward/instructor/rank_datasets.py
+++ b/model/reward/instructor/rank_datasets.py
@@ -31,6 +31,7 @@ class DataCollatorForPairRank:
     padding: Union[bool, str, PaddingStrategy] = True
     max_length: Optional[int] = None
     pad_to_multiple_of: Optional[int] = None
+    drop_token_type: bool = False
 
     def __call__(self, features):
 
@@ -51,6 +52,8 @@ class DataCollatorForPairRank:
             pad_to_multiple_of=self.pad_to_multiple_of,
             return_tensors="pt",
         )
+        if self.drop_token_type:
+            batch.pop('token_type_ids')
         # batch = {k: v.view(batch_size, self.num_choices, -1) for k, v in batch.items()}
         return batch
 

From f3c299757d89fc6913996d852e3e8563ae61b5cf Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sat, 31 Dec 2022 17:02:46 +0000
Subject: [PATCH 06/53] [feature] added configs argument for parameters
 training and recording

---
 model/reward/instructor/README.md        |  3 ++
 model/reward/instructor/rank_datasets.py |  2 -
 model/reward/instructor/trainer.py       | 52 ++++++++++++++++--------
 model/reward/instructor/utils.py         | 38 +++++++++++++++++
 4 files changed, 76 insertions(+), 19 deletions(-)

diff --git a/model/reward/instructor/README.md b/model/reward/instructor/README.md
index 7dbfefbc..a8b5ef33 100644
--- a/model/reward/instructor/README.md
+++ b/model/reward/instructor/README.md
@@ -1,5 +1,8 @@
+# Sections to train Reward Model (RM)
 
 
+Currently we format 
+
 
 ```bash
 
diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py
index 41740dcf..aa77089c 100644
--- a/model/reward/instructor/rank_datasets.py
+++ b/model/reward/instructor/rank_datasets.py
@@ -9,13 +9,11 @@
 
 '''
 from typing import Optional, Union
-import os
 import glob
 import json
 from dataclasses import dataclass
 import numpy as np
 from torch.utils.data import Dataset
-import torch
 from datasets import load_dataset
 from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
 
diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py
index 586c8d47..06bb8098 100644
--- a/model/reward/instructor/trainer.py
+++ b/model/reward/instructor/trainer.py
@@ -1,18 +1,22 @@
 import os
 os.environ['WANDB_PROJECT'] = 'reward-model'
-from typing import Any, Callable, List, Optional, Tuple, Union, Dict
 import torch
-from torch import nn
-import numpy as np
+import yaml
 import evaluate
+from typing import Any, Callable, List, Optional, Tuple, Union, Dict
+from torch import nn
+from argparse import ArgumentParser
+import numpy as np
 from dataclasses import dataclass
 from torch.utils.data import Dataset, ConcatDataset
-from transformers import AutoModelForSequenceClassification, AutoModelForMultipleChoice
+from transformers import AutoModelForSequenceClassification
 from transformers import Trainer, PreTrainedModel, TrainingArguments, DataCollator, EvalPrediction, TrainerCallback, PreTrainedTokenizerBase
 from rank_datasets import DataCollatorForPairRank, WebGPT, HFSummary
-from utils import get_tokenizer, train_val_dataset
+from utils import get_tokenizer, train_val_dataset, freeze_top_n_layers, argument_parsing
 
 accuracy = evaluate.load("accuracy")
+parser = ArgumentParser()
+parser.add_argument('config', type=str)
 
 @dataclass
 class CustomTrainingArguments(TrainingArguments):
@@ -87,21 +91,26 @@ class RankTrainer(Trainer):
         return (loss, logits, labels)
 
 if __name__ == "__main__":
-    model_name = 'bigscience/bloomz-560m'
-    model_name = 'google/electra-large-discriminator'
+    training_conf = argument_parsing(parser)
+    
+    model_name = training_conf['model_name']
     model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type='regression')
+    if 'freeze_layer' in training_conf:
+        num_layer = training_conf['freeze_layer']
+        model = freeze_top_n_layers(model, num_layer)
+
     tokenizer = get_tokenizer(model_name)
     args = CustomTrainingArguments(
         output_dir=f"{model_name}-finetuned",
-        num_train_epochs=4,
+        num_train_epochs=training_conf['num_train_epochs'],
         warmup_steps=500,
-        loss_function='rank',
-        learning_rate=3e-5,
+        loss_function=training_conf['loss'],
+        learning_rate=training_conf['learning_rate'],
         # half_precision_backend="apex",
         fp16=True,
-        gradient_checkpointing=True,
-        gradient_accumulation_steps=8,
-        per_device_train_batch_size=8,
+        gradient_checkpointing=training_conf['gradient_checkpointing'],
+        gradient_accumulation_steps=training_conf['gradient_checkpointing'],
+        per_device_train_batch_size=training_conf['per_device_train_batch_size'],
         per_device_eval_batch_size=5,
         weight_decay=0.01,
         max_grad_norm=2.0,
@@ -112,10 +121,19 @@ if __name__ == "__main__":
         save_steps=1000,
         report_to='wandb'
     )
-    dataset = WebGPT()
-    train, eval = train_val_dataset(dataset)
-    train = ConcatDataset([train, HFSummary()])
-    collate_fn = DataCollatorForPairRank(tokenizer, max_length=440)
+    train_datasets, evals = [], {}
+    if 'webgpt' in training_conf['datasets']:
+        web_dataset = WebGPT()
+        train, eval = train_val_dataset(web_dataset)
+        train_datasets.append(train)
+        evals['webgpt'] = eval
+    if 'hfsummary' in training_conf['datasets']:
+        summary_dataset = HFSummary()
+        sum_train, sum_eval = train_val_dataset(summary_dataset)
+        train_datasets.append(sum_train)
+        evals['hfsummary'] = sum_eval
+    
+    collate_fn = DataCollatorForPairRank(tokenizer, max_length=training_conf['max_length'])
     trainer = RankTrainer(
         model,
         args,
diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py
index 10f84193..4867087c 100644
--- a/model/reward/instructor/utils.py
+++ b/model/reward/instructor/utils.py
@@ -1,4 +1,5 @@
 import re
+import yaml
 from torch.utils.data import Subset
 from sklearn.model_selection import train_test_split
 from transformers import AutoTokenizer
@@ -39,3 +40,40 @@ def train_val_dataset(dataset, val_split=0.2):
     print(train_idx[:10])
     return Subset(dataset, train_idx), Subset(dataset, val_idx)
 
+def freeze_top_n_layers(model, target_layers):
+    for name, param in model.name_parameters():
+        if 'embed' in name:
+            param.requires_grad = False
+        elif 'layer' in name:
+            tokens = name.split('.')
+            idx = 0
+            for token in tokens:
+                if 'layer' in token:
+                    break
+                idx += 1
+
+            layer_ = int(tokens[idx+1])
+            if layer_ < target_layers:
+                param.requires_grad = False
+    return model
+
+
+def argument_parsing(parser):
+    default_params = {
+        'num_train_epochs': 4,
+        'learning_rate': 3e-5,
+        'eval_steps': 500,
+        'loss': 'rank',
+        'max_length': 440,
+        'per_device_train_batch_size': 8,
+        'gradient_accumulation_steps': 8,
+        'gradient_checkpointing': False,
+        'datasets': ['webgpt']
+    }
+    args = parser.parse_args()
+    with open(args.config, 'r', encoding='utf-8') as f:
+        training_conf = yaml.safe_load(f.read())
+
+    return { **default_params, **training_conf }
+
+

From 24e06626f46e1f9a4bd4f112ac8c8af45556e866 Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sat, 31 Dec 2022 17:04:44 +0000
Subject: [PATCH 07/53] [fix] Fix missing configs

---
 model/reward/instructor/configs/electra-base-dis-webgpt.yml | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 model/reward/instructor/configs/electra-base-dis-webgpt.yml

diff --git a/model/reward/instructor/configs/electra-base-dis-webgpt.yml b/model/reward/instructor/configs/electra-base-dis-webgpt.yml
new file mode 100644
index 00000000..5c02fab7
--- /dev/null
+++ b/model/reward/instructor/configs/electra-base-dis-webgpt.yml
@@ -0,0 +1,2 @@
+model_name: google/electra-base-discriminator
+learning_rate: 3e-5

From 918b7b7ec0446651cb724ee0909288d6a89ce71b Mon Sep 17 00:00:00 2001
From: theblackcat102 <ray.tam@ikala.tv>
Date: Sun, 1 Jan 2023 01:25:53 +0800
Subject: [PATCH 08/53] [feature] Add galactica training config

---
 model/reward/instructor/configs/galactica-125m.yml | 13 +++++++++++++
 model/reward/instructor/configs/galactica-1b.yml   |  8 ++++++++
 model/reward/instructor/trainer.py                 |  6 +++---
 model/reward/instructor/utils.py                   |  8 ++++++--
 4 files changed, 30 insertions(+), 5 deletions(-)
 create mode 100644 model/reward/instructor/configs/galactica-125m.yml
 create mode 100644 model/reward/instructor/configs/galactica-1b.yml

diff --git a/model/reward/instructor/configs/galactica-125m.yml b/model/reward/instructor/configs/galactica-125m.yml
new file mode 100644
index 00000000..55e093f5
--- /dev/null
+++ b/model/reward/instructor/configs/galactica-125m.yml
@@ -0,0 +1,13 @@
+model_name: facebook/galactica-125m
+learning_rate: 1e-5
+gradient_checkpointing: false
+gradient_accumulation_steps: 32
+per_device_train_batch_size: 2
+warmup_steps: 600
+eval_steps: 200
+save_steps: 500
+max_length: 512
+num_train_epochs: 2
+datasets:
+  - webgpt
+  - hfsummary
\ No newline at end of file
diff --git a/model/reward/instructor/configs/galactica-1b.yml b/model/reward/instructor/configs/galactica-1b.yml
new file mode 100644
index 00000000..48ad439b
--- /dev/null
+++ b/model/reward/instructor/configs/galactica-1b.yml
@@ -0,0 +1,8 @@
+model_name: facebook/galactica-1.3b
+learning_rate: 6e-6
+gradient_checkpointing: false
+gradient_accumulation_steps: 16
+per_device_train_batch_size: 4
+warmup_steps: 600
+eval_steps: 200
+save_steps: 500
\ No newline at end of file
diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py
index 06bb8098..dbdd91ba 100644
--- a/model/reward/instructor/trainer.py
+++ b/model/reward/instructor/trainer.py
@@ -109,7 +109,7 @@ if __name__ == "__main__":
         # half_precision_backend="apex",
         fp16=True,
         gradient_checkpointing=training_conf['gradient_checkpointing'],
-        gradient_accumulation_steps=training_conf['gradient_checkpointing'],
+        gradient_accumulation_steps=training_conf['gradient_accumulation_steps'],
         per_device_train_batch_size=training_conf['per_device_train_batch_size'],
         per_device_eval_batch_size=5,
         weight_decay=0.01,
@@ -132,8 +132,8 @@ if __name__ == "__main__":
         sum_train, sum_eval = train_val_dataset(summary_dataset)
         train_datasets.append(sum_train)
         evals['hfsummary'] = sum_eval
-    
-    collate_fn = DataCollatorForPairRank(tokenizer, max_length=training_conf['max_length'])
+    train = ConcatDataset(train_datasets)
+    collate_fn = DataCollatorForPairRank(tokenizer, max_length=training_conf['max_length'], drop_token_type= 'galactica' in model_name)
     trainer = RankTrainer(
         model,
         args,
diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py
index 4867087c..733e6ea7 100644
--- a/model/reward/instructor/utils.py
+++ b/model/reward/instructor/utils.py
@@ -74,6 +74,10 @@ def argument_parsing(parser):
     with open(args.config, 'r', encoding='utf-8') as f:
         training_conf = yaml.safe_load(f.read())
 
-    return { **default_params, **training_conf }
-
+    params = { **default_params, **training_conf }
+    params['gradient_accumulation_steps'] = int(params['gradient_accumulation_steps'])
+    params['num_train_epochs'] = int(params['num_train_epochs'])
+    params['per_device_train_batch_size'] = int(params['per_device_train_batch_size'])
+    params['learning_rate'] = float(params['learning_rate'])
+    return params
 

From ba336fb087d10892b47133fdbee49846e6759db4 Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sat, 31 Dec 2022 17:43:27 +0000
Subject: [PATCH 09/53] [fix] fix freeze top N layers

---
 model/reward/instructor/configs/galactica-1b.yml | 10 ++++++++--
 model/reward/instructor/trainer.py               |  3 +++
 model/reward/instructor/utils.py                 |  7 ++++---
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/model/reward/instructor/configs/galactica-1b.yml b/model/reward/instructor/configs/galactica-1b.yml
index 48ad439b..5a094520 100644
--- a/model/reward/instructor/configs/galactica-1b.yml
+++ b/model/reward/instructor/configs/galactica-1b.yml
@@ -2,7 +2,13 @@ model_name: facebook/galactica-1.3b
 learning_rate: 6e-6
 gradient_checkpointing: false
 gradient_accumulation_steps: 16
-per_device_train_batch_size: 4
+per_device_train_batch_size: 2
 warmup_steps: 600
+freeze_layer: 20
 eval_steps: 200
-save_steps: 500
\ No newline at end of file
+save_steps: 500
+max_length: 400
+num_train_epochs: 2
+datasets:
+  - webgpt
+  - hfsummary
\ No newline at end of file
diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py
index dbdd91ba..22baf130 100644
--- a/model/reward/instructor/trainer.py
+++ b/model/reward/instructor/trainer.py
@@ -98,6 +98,9 @@ if __name__ == "__main__":
     if 'freeze_layer' in training_conf:
         num_layer = training_conf['freeze_layer']
         model = freeze_top_n_layers(model, num_layer)
+        model_parameters = filter(lambda p: p.requires_grad, model.parameters())
+        params = sum([np.prod(p.size()) for p in model_parameters])
+        print('Number of trainable : {}M'.format(int(params/1e6)))
 
     tokenizer = get_tokenizer(model_name)
     args = CustomTrainingArguments(
diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py
index 733e6ea7..ef3ed98d 100644
--- a/model/reward/instructor/utils.py
+++ b/model/reward/instructor/utils.py
@@ -41,23 +41,24 @@ def train_val_dataset(dataset, val_split=0.2):
     return Subset(dataset, train_idx), Subset(dataset, val_idx)
 
 def freeze_top_n_layers(model, target_layers):
-    for name, param in model.name_parameters():
+    for name, param in model.named_parameters():
         if 'embed' in name:
             param.requires_grad = False
-        elif 'layer' in name:
+        elif '.layer' in name:
             tokens = name.split('.')
             idx = 0
             for token in tokens:
                 if 'layer' in token:
                     break
                 idx += 1
+            if idx >= len(tokens):
+                continue
 
             layer_ = int(tokens[idx+1])
             if layer_ < target_layers:
                 param.requires_grad = False
     return model
 
-
 def argument_parsing(parser):
     default_params = {
         'num_train_epochs': 4,

From c5b31d0b9e268cebd7b1f3ab8a5327541d8e6dd2 Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sat, 31 Dec 2022 18:20:41 +0000
Subject: [PATCH 10/53] [feature] update reamde

---
 model/reward/instructor/README.md        |  25 +++-
 model/reward/instructor/requirements.txt | 140 +++++++++++++++++++++++
 2 files changed, 163 insertions(+), 2 deletions(-)
 create mode 100644 model/reward/instructor/requirements.txt

diff --git a/model/reward/instructor/README.md b/model/reward/instructor/README.md
index a8b5ef33..29716dca 100644
--- a/model/reward/instructor/README.md
+++ b/model/reward/instructor/README.md
@@ -1,10 +1,31 @@
 # Sections to train Reward Model (RM)
 
+Trainer code based on huggingface. Should be compatible with deepspeed or accelerate
 
-Currently we format 
+
+
+Requirements
+
+```
+wandb
+evaluate
+datasets
+transformers
+torch==1.12
+```
+
+To train your model run this
 
 
 ```bash
+python trainer.py configs/electra-base-dis-webgpt.yml
+```
+
+
+## Dataset
+
+For now we only supports webgpt and summary dataset from OpenAI. Once open-asisstant dataset are available it will be added here.
+
+
 
 
-```
\ No newline at end of file
diff --git a/model/reward/instructor/requirements.txt b/model/reward/instructor/requirements.txt
new file mode 100644
index 00000000..9fc45917
--- /dev/null
+++ b/model/reward/instructor/requirements.txt
@@ -0,0 +1,140 @@
+aiohttp==3.8.3
+aiosignal==1.3.1
+anyio==3.6.2
+argon2-cffi==21.3.0
+argon2-cffi-bindings==21.2.0
+arrow==1.2.3
+asttokens==2.2.1
+async-timeout==4.0.2
+attrs==22.2.0
+autopep8==2.0.1
+backcall==0.2.0
+beautifulsoup4==4.11.1
+bleach==5.0.1
+certifi==2022.12.7
+cffi==1.15.1
+charset-normalizer==2.1.1
+click==8.1.3
+comm==0.1.2
+datasets==2.8.0
+debugpy==1.6.4
+decorator==5.1.1
+defusedxml==0.7.1
+dill==0.3.6
+docker-pycreds==0.4.0
+entrypoints==0.4
+evaluate==0.4.0
+exceptiongroup==1.1.0
+executing==1.2.0
+fastjsonschema==2.16.2
+filelock==3.9.0
+fqdn==1.5.1
+frozenlist==1.3.3
+fsspec==2022.11.0
+gitdb==4.0.10
+GitPython==3.1.30
+huggingface-hub==0.11.1
+idna==3.4
+iniconfig==1.1.1
+ipykernel==6.19.4
+ipython==8.7.0
+ipython-genutils==0.2.0
+ipywidgets==8.0.4
+isoduration==20.11.0
+jedi==0.18.2
+Jinja2==3.1.2
+joblib==1.2.0
+jsonpointer==2.3
+jsonschema==4.17.3
+jupyter==1.0.0
+jupyter-console==6.4.4
+jupyter-events==0.5.0
+jupyter_client==7.4.8
+jupyter_core==5.1.1
+jupyter_server==2.0.6
+jupyter_server_terminals==0.4.3
+jupyterlab-pygments==0.2.2
+jupyterlab-widgets==3.0.5
+lightning-utilities==0.5.0
+MarkupSafe==2.1.1
+matplotlib-inline==0.1.6
+mistune==2.0.4
+multidict==6.0.4
+multiprocess==0.70.14
+nbclassic==0.4.8
+nbclient==0.7.2
+nbconvert==7.2.7
+nbformat==5.7.1
+nest-asyncio==1.5.6
+notebook==6.5.2
+notebook_shim==0.2.2
+numpy==1.24.1
+packaging==22.0
+pandas==1.5.2
+pandocfilters==1.5.0
+parso==0.8.3
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+platformdirs==2.6.2
+pluggy==1.0.0
+prometheus-client==0.15.0
+promise==2.3
+prompt-toolkit==3.0.36
+protobuf==3.20.1
+psutil==5.9.4
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyarrow==10.0.1
+pycodestyle==2.10.0
+pycparser==2.21
+Pygments==2.13.0
+pyrsistent==0.19.3
+pytest==7.2.0
+python-dateutil==2.8.2
+python-json-logger==2.0.4
+pytorch-lightning==1.8.6
+pytz==2022.7
+PyYAML==6.0
+pyzmq==24.0.1
+qtconsole==5.4.0
+QtPy==2.3.0
+regex==2022.10.31
+requests==2.28.1
+responses==0.18.0
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+scikit-learn==1.2.0
+scipy==1.9.3
+Send2Trash==1.8.0
+sentry-sdk==1.12.1
+setproctitle==1.3.2
+shortuuid==1.0.11
+six==1.16.0
+smmap==5.0.0
+sniffio==1.3.0
+soupsieve==2.3.2.post1
+stack-data==0.6.2
+tensorboardX==2.5.1
+terminado==0.17.1
+threadpoolctl==3.1.0
+tinycss2==1.2.1
+tokenizers==0.13.2
+tomli==2.0.1
+torch==1.12.1+cu116
+torchmetrics==0.11.0
+tornado==6.2
+tqdm==4.64.1
+traitlets==5.8.0
+transformers==4.25.1
+typing_extensions==4.4.0
+uri-template==1.2.0
+urllib3==1.26.13
+wandb==0.13.7
+wcwidth==0.2.5
+webcolors==1.12
+webencodings==0.5.1
+websocket-client==1.4.2
+widgetsnbextension==4.0.5
+xxhash==3.2.0
+yarl==1.8.2

From 8a42ed32950cb7028bb50531281f6c736b2ac4e3 Mon Sep 17 00:00:00 2001
From: mrcabbage972 <mayvic@gmail.com>
Date: Sat, 31 Dec 2022 16:44:04 -0500
Subject: [PATCH 11/53] Adding a file for listing relevant research papers

---
 docs/research/README.md | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 docs/research/README.md

diff --git a/docs/research/README.md b/docs/research/README.md
new file mode 100644
index 00000000..bf4461e7
--- /dev/null
+++ b/docs/research/README.md
@@ -0,0 +1,21 @@
+# Research
+This page lists research papers that are relevant to the project.
+
+## Automatically Generating Instruction Data for Training
+This line of work is about significantly reducing the need for manually annotated data for the purpose of training [instruction-aligned](https://openai.com/blog/instruction-following/) language models.
+### SELF-INSTRUCT: Aligning Language Model with Self Generated Instructions [[ArXiv](https://arxiv.org/pdf/2212.10560.pdf)], [[Github](https://github.com/yizhongw/self-instruct)].
+
+> We introduce SELF-INSTRUCT, a framework for improving the instruction-following capabilities of pretrained language models by bootstrapping off its own generations. 
+> Our pipeline generates instruction, input, and output samples from a language model, then prunes them before using them to finetune the original model. 
+> Applying our method to vanilla GPT3, we demonstrate a 33% absolute improvement over the original model on SuperNaturalInstructions, on par with the performance of InstructGPT-0011, which is trained with private user data and human annotations.
+
+
+### Tuning Language Models with (Almost) No Human Labor. [[ArXiv](https://arxiv.org/pdf/2212.09689.pdf)], [[Github](https://github.com/orhonovich/unnatural-instructions)].
+
+> In this work, we introduce
+Unnatural Instructions: a large dataset of creative and diverse instructions, collected with virtually no human labor. 
+> We collect 64,000 examples by prompting a language model with three seed examples of instructions and eliciting a fourth. 
+> This set is then expanded by prompting the model to rephrase each instruction, creating a total of approximately 240,000 examples of instructions, inputs, and outputs.
+> Experiments show that despite containing a fair amount of noise, training on Unnatural Instructions rivals the effectiveness of training
+on open-source manually-curated datasets, surpassing the performance of models such as
+T0++ and Tk-Instruct across various benchmarks.
\ No newline at end of file

From 35f4c2f0d900e5bb1bada6a8748b89f3a2b36367 Mon Sep 17 00:00:00 2001
From: mrcabbage972 <mayvic@gmail.com>
Date: Sat, 31 Dec 2022 19:28:40 -0500
Subject: [PATCH 12/53] Adding missing line break

---
 docs/research/README.md | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/docs/research/README.md b/docs/research/README.md
index bf4461e7..498a858b 100644
--- a/docs/research/README.md
+++ b/docs/research/README.md
@@ -1,21 +1,23 @@
 # Research
+
 This page lists research papers that are relevant to the project.
 
 ## Automatically Generating Instruction Data for Training
+
 This line of work is about significantly reducing the need for manually annotated data for the purpose of training [instruction-aligned](https://openai.com/blog/instruction-following/) language models.
+
 ### SELF-INSTRUCT: Aligning Language Model with Self Generated Instructions [[ArXiv](https://arxiv.org/pdf/2212.10560.pdf)], [[Github](https://github.com/yizhongw/self-instruct)].
 
-> We introduce SELF-INSTRUCT, a framework for improving the instruction-following capabilities of pretrained language models by bootstrapping off its own generations. 
-> Our pipeline generates instruction, input, and output samples from a language model, then prunes them before using them to finetune the original model. 
+> We introduce SELF-INSTRUCT, a framework for improving the instruction-following capabilities of pretrained language models by bootstrapping off its own generations.
+> Our pipeline generates instruction, input, and output samples from a language model, then prunes them before using them to finetune the original model.
 > Applying our method to vanilla GPT3, we demonstrate a 33% absolute improvement over the original model on SuperNaturalInstructions, on par with the performance of InstructGPT-0011, which is trained with private user data and human annotations.
 
-
 ### Tuning Language Models with (Almost) No Human Labor. [[ArXiv](https://arxiv.org/pdf/2212.09689.pdf)], [[Github](https://github.com/orhonovich/unnatural-instructions)].
 
 > In this work, we introduce
-Unnatural Instructions: a large dataset of creative and diverse instructions, collected with virtually no human labor. 
-> We collect 64,000 examples by prompting a language model with three seed examples of instructions and eliciting a fourth. 
+> Unnatural Instructions: a large dataset of creative and diverse instructions, collected with virtually no human labor.
+> We collect 64,000 examples by prompting a language model with three seed examples of instructions and eliciting a fourth.
 > This set is then expanded by prompting the model to rephrase each instruction, creating a total of approximately 240,000 examples of instructions, inputs, and outputs.
 > Experiments show that despite containing a fair amount of noise, training on Unnatural Instructions rivals the effectiveness of training
-on open-source manually-curated datasets, surpassing the performance of models such as
-T0++ and Tk-Instruct across various benchmarks.
\ No newline at end of file
+> on open-source manually-curated datasets, surpassing the performance of models such as
+> T0++ and Tk-Instruct across various benchmarks.

From 0119ee666b64b7de779d440976ec367e688a1594 Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sun, 1 Jan 2023 02:09:21 +0000
Subject: [PATCH 13/53] [feature] Add support for bloomz

---
 model/reward/instructor/README.md                 | 15 ++++++++++++---
 model/reward/instructor/configs/bloomz-560m.yml   | 10 ++++++++++
 .../configs/electra-base-dis-webgpt.yml           |  3 ++-
 model/reward/instructor/rank_datasets.py          |  8 +++++++-
 model/reward/instructor/utils.py                  | 15 +++++++++++++--
 5 files changed, 44 insertions(+), 7 deletions(-)
 create mode 100644 model/reward/instructor/configs/bloomz-560m.yml

diff --git a/model/reward/instructor/README.md b/model/reward/instructor/README.md
index 29716dca..5992dbc0 100644
--- a/model/reward/instructor/README.md
+++ b/model/reward/instructor/README.md
@@ -1,7 +1,6 @@
 # Sections to train Reward Model (RM)
 
-Trainer code based on huggingface. Should be compatible with deepspeed or accelerate
-
+Trainer code based on huggingface. Compatible with deepspeed or accelerate
 
 
 Requirements
@@ -14,7 +13,7 @@ transformers
 torch==1.12
 ```
 
-To train your model run this
+Start training
 
 
 ```bash
@@ -26,6 +25,16 @@ python trainer.py configs/electra-base-dis-webgpt.yml
 
 For now we only supports webgpt and summary dataset from OpenAI. Once open-asisstant dataset are available it will be added here.
 
+## Model
 
+Check out configs
 
+```
+Open-Assistant/model/reward/instructor/configs/
+    bloomz-560m.yml
+    electra-base-dis-webgpt.yml
+    galactica-125m.yml
+    galactica-1b.yml
+```
 
+You can add new huggingface model as you want.
diff --git a/model/reward/instructor/configs/bloomz-560m.yml b/model/reward/instructor/configs/bloomz-560m.yml
new file mode 100644
index 00000000..c8f55746
--- /dev/null
+++ b/model/reward/instructor/configs/bloomz-560m.yml
@@ -0,0 +1,10 @@
+model_name: bigscience/bloomz-560m
+learning_rate: 3e-5
+gradient_accumulation_steps: 16
+per_device_train_batch_size: 2
+max_length: 600
+freeze_layer: 12
+num_train_epochs: 2
+datasets:
+  - webgpt
+  - hfsummary
\ No newline at end of file
diff --git a/model/reward/instructor/configs/electra-base-dis-webgpt.yml b/model/reward/instructor/configs/electra-base-dis-webgpt.yml
index 5c02fab7..fc168b63 100644
--- a/model/reward/instructor/configs/electra-base-dis-webgpt.yml
+++ b/model/reward/instructor/configs/electra-base-dis-webgpt.yml
@@ -1,2 +1,3 @@
-model_name: google/electra-base-discriminator
+model_name: google/electra-large-discriminator
 learning_rate: 3e-5
+max_length: 300
\ No newline at end of file
diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py
index aa77089c..3d122915 100644
--- a/model/reward/instructor/rank_datasets.py
+++ b/model/reward/instructor/rank_datasets.py
@@ -1,6 +1,12 @@
 '''
     author: theblackcat102
 
+    Dataset output format from __getitem__
+
+     - question / prompt : string
+
+     - answers / rows : list of tuple pair. The first element in the tuple pair must be the positive pair (rank higher than the second element)
+
     A list of rank based dataset for training using rank loss
 
     Some nice features to have
@@ -105,7 +111,7 @@ class HFSummary(Dataset):
 
             >> azcopy copy "https://openaipublic.blob.core.windows.net/summarize-from-feedback/dataset/*" . --recursive
         
-        choice : 0 or 1
+        labeling method : pair comparison, 0 or 1
 
     '''
     def __init__(self, split='train',
diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py
index ef3ed98d..f26add55 100644
--- a/model/reward/instructor/utils.py
+++ b/model/reward/instructor/utils.py
@@ -41,14 +41,16 @@ def train_val_dataset(dataset, val_split=0.2):
     return Subset(dataset, train_idx), Subset(dataset, val_idx)
 
 def freeze_top_n_layers(model, target_layers):
+    # its possible we can simply detect which module is a ModuleList
+    # and simply freeze the module without doing string parsing
     for name, param in model.named_parameters():
         if 'embed' in name:
             param.requires_grad = False
-        elif '.layer' in name:
+        elif '.layer' in name or '.h.' in name:
             tokens = name.split('.')
             idx = 0
             for token in tokens:
-                if 'layer' in token:
+                if 'layer' in token or token == 'h':
                     break
                 idx += 1
             if idx >= len(tokens):
@@ -56,6 +58,7 @@ def freeze_top_n_layers(model, target_layers):
 
             layer_ = int(tokens[idx+1])
             if layer_ < target_layers:
+                # print('freeze ', layer_, name)
                 param.requires_grad = False
     return model
 
@@ -82,3 +85,11 @@ def argument_parsing(parser):
     params['learning_rate'] = float(params['learning_rate'])
     return params
 
+
+
+if __name__ == "__main__":
+    from transformers import AutoModelForSequenceClassification
+
+    model = AutoModelForSequenceClassification.from_pretrained('bigscience/bloomz-560m')
+    freeze_top_n_layers(model, 10)
+    print(model.state_dict().keys())
\ No newline at end of file

From e27a3eb3c75e6b3193e712e3cfd76298e0dc6bc6 Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sun, 1 Jan 2023 02:22:57 +0000
Subject: [PATCH 14/53] [fix] Tidy up todo and trainer comments

---
 model/reward/instructor/TODO.md                     | 13 ++++++++++++-
 .../instructor/configs/bloomz-560m-summary.yml      |  9 +++++++++
 model/reward/instructor/trainer.py                  |  2 +-
 model/utils.py                                      |  4 ----
 4 files changed, 22 insertions(+), 6 deletions(-)
 create mode 100644 model/reward/instructor/configs/bloomz-560m-summary.yml
 delete mode 100644 model/utils.py

diff --git a/model/reward/instructor/TODO.md b/model/reward/instructor/TODO.md
index 33bc6595..ec23b7c3 100644
--- a/model/reward/instructor/TODO.md
+++ b/model/reward/instructor/TODO.md
@@ -1,12 +1,23 @@
 
 Some other reward features we can use
 
+0. Finish classifcation feature 
 
-Summaries from human feedback
+1. Summaries from human feedback
 
 * use `confidence` score into the RM learning, ensure the output rank score correlates with confidence
 
 * each labeling has a labeling `note`, basically comments by labeler, not sure what else we can use
 
+* Use the score for "overall", "accuracy", "coverage", "coherence" from axis/evals to train an addition model (rank additional aspect of the policy model)
+
+    * this should be placed under experimental_dataset.py
+
+
+2. Add support for anthropic dataset
+
+* anthropic dataset is more like a conversation tree which is much complex than simply question-answer schema
+
+    * this is basically a MCTS from alphazero.
 
 
diff --git a/model/reward/instructor/configs/bloomz-560m-summary.yml b/model/reward/instructor/configs/bloomz-560m-summary.yml
new file mode 100644
index 00000000..a02f4e4a
--- /dev/null
+++ b/model/reward/instructor/configs/bloomz-560m-summary.yml
@@ -0,0 +1,9 @@
+model_name: bigscience/bloomz-560m
+learning_rate: 3e-5
+gradient_accumulation_steps: 16
+per_device_train_batch_size: 2
+max_length: 600
+freeze_layer: 12
+num_train_epochs: 2
+datasets:
+  - hfsummary
\ No newline at end of file
diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py
index 22baf130..de0b011a 100644
--- a/model/reward/instructor/trainer.py
+++ b/model/reward/instructor/trainer.py
@@ -92,7 +92,7 @@ class RankTrainer(Trainer):
 
 if __name__ == "__main__":
     training_conf = argument_parsing(parser)
-    
+
     model_name = training_conf['model_name']
     model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type='regression')
     if 'freeze_layer' in training_conf:
diff --git a/model/utils.py b/model/utils.py
deleted file mode 100644
index 579b3f6e..00000000
--- a/model/utils.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from transformers import AutoTokenizer
-
-
-def update_galactica_tokenizer():
\ No newline at end of file

From a5a2625e2d15f327d89ee89708284971ba96e59f Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sun, 1 Jan 2023 02:55:54 +0000
Subject: [PATCH 15/53] [merge] most of the bugs should be fixed. #77

---
 model/reward/instructor/cls_dataset.py            | 15 ++++-----------
 .../test-galactica-125m-classification.yml        | 14 ++++++++++++++
 model/reward/instructor/rank_datasets.py          |  8 ++++++--
 model/reward/instructor/trainer.py                |  5 ++++-
 4 files changed, 28 insertions(+), 14 deletions(-)
 create mode 100644 model/reward/instructor/configs/test-galactica-125m-classification.yml

diff --git a/model/reward/instructor/cls_dataset.py b/model/reward/instructor/cls_dataset.py
index 54bbd19e..ff824d19 100644
--- a/model/reward/instructor/cls_dataset.py
+++ b/model/reward/instructor/cls_dataset.py
@@ -24,20 +24,10 @@ class WebGPTDataset(Dataset):
         '''
         os.makedirs('dataset', exist_ok=True)
         dataset = load_dataset("openai/webgpt_comparisons")
-        if os.path.exists(index_cache):
-            train_idx = torch.load(index_cache)
-        else:
-            train_idx = np.random.choice(range(len(dataset['train'])), int(len(dataset['train'])*0.8), replace=False)
-            torch.save(set(train_idx.tolist()), index_cache)
         self.dataset = []
         self.dataset_index = []
         for idx, row in enumerate(dataset['train']):
-            if mode == 'train' and idx in train_idx:
-                self.dataset.append(webgpt_return_format(row))
-                self.dataset_index.append(idx)
-            elif idx not in train_idx and mode != 'train':
-                self.dataset.append(webgpt_return_format(row))
-                self.dataset_index.append(idx)
+            self.dataset.append(webgpt_return_format(row))
 
         # since this dataset was generated from 176B GPT-3
         # we needed some more sample generated from the starting model
@@ -71,3 +61,6 @@ class WebGPTDataset(Dataset):
 
         gen_neg = random.choice(self.additional[self.dataset_index[index]])
         return row['question'], row['pos'], row['neg'], gen_neg
+
+
+
diff --git a/model/reward/instructor/configs/test-galactica-125m-classification.yml b/model/reward/instructor/configs/test-galactica-125m-classification.yml
new file mode 100644
index 00000000..1ad1f47c
--- /dev/null
+++ b/model/reward/instructor/configs/test-galactica-125m-classification.yml
@@ -0,0 +1,14 @@
+model_name: facebook/galactica-125m
+learning_rate: 1e-5
+gradient_checkpointing: false
+gradient_accumulation_steps: 10
+per_device_train_batch_size: 6
+warmup_steps: 600
+loss: cls
+eval_steps: 200
+save_steps: 500
+max_length: 128
+num_train_epochs: 2
+datasets:
+  - webgpt
+  - hfsummary
\ No newline at end of file
diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py
index 3d122915..4ba6293c 100644
--- a/model/reward/instructor/rank_datasets.py
+++ b/model/reward/instructor/rank_datasets.py
@@ -11,7 +11,11 @@
 
     Some nice features to have
 
-    [ ] 
+    [] support additional negative samples generated from other models.
+
+        For example we can use galactica-125m to generate a TLDR and assume it was
+        inferior than the human perference one
+
 
 '''
 from typing import Optional, Union
@@ -35,7 +39,7 @@ class DataCollatorForPairRank:
     padding: Union[bool, str, PaddingStrategy] = True
     max_length: Optional[int] = None
     pad_to_multiple_of: Optional[int] = None
-    drop_token_type: bool = False
+    drop_token_type: bool = False # galactica
 
     def __call__(self, features):
 
diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py
index de0b011a..48fc4e8d 100644
--- a/model/reward/instructor/trainer.py
+++ b/model/reward/instructor/trainer.py
@@ -77,7 +77,10 @@ class RankTrainer(Trainer):
 
         return loss, logits
 
-    def prediction_step(self, model: nn.Module, inputs: Dict[str, Union[torch.Tensor, Any]], prediction_loss_only: bool, ignore_keys: Optional[List[str]] = None) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
+    def prediction_step(self, model: nn.Module,
+            inputs: Dict[str, Union[torch.Tensor, Any]],
+            prediction_loss_only: bool,
+            ignore_keys: Optional[List[str]] = None) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
 
         with torch.no_grad():
             # compute loss on predict data

From 4b7f1f25a138e614ab9f385f08913878a8a21bbb Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sun, 1 Jan 2023 03:07:40 +0000
Subject: [PATCH 16/53] [fix] Use official split for eval

---
 model/reward/instructor/trainer.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py
index 48fc4e8d..391464c6 100644
--- a/model/reward/instructor/trainer.py
+++ b/model/reward/instructor/trainer.py
@@ -134,12 +134,14 @@ if __name__ == "__main__":
         train_datasets.append(train)
         evals['webgpt'] = eval
     if 'hfsummary' in training_conf['datasets']:
-        summary_dataset = HFSummary()
-        sum_train, sum_eval = train_val_dataset(summary_dataset)
+        sum_train = HFSummary(split='train')
         train_datasets.append(sum_train)
+        sum_eval = HFSummary(split='valid1')
+        assert len(sum_eval) > 0
         evals['hfsummary'] = sum_eval
     train = ConcatDataset(train_datasets)
     collate_fn = DataCollatorForPairRank(tokenizer, max_length=training_conf['max_length'], drop_token_type= 'galactica' in model_name)
+    assert len(evals) > 0
     trainer = RankTrainer(
         model,
         args,

From d7c049560566ef1fcd2bf9ed4f4543e95f24fa95 Mon Sep 17 00:00:00 2001
From: Keith Stevens <fozziethebeat@gmail.com>
Date: Sun, 1 Jan 2023 12:48:05 +0900
Subject: [PATCH 17/53] Deleting a few links that don't go to a page we're
 planning to build out.  Updating two links to go to pages that will soon
 exist

---
 website/src/components/Footer.tsx        | 23 ++++++++++-------------
 website/src/components/Header/Header.tsx |  7 -------
 website/src/pages/index.tsx              |  1 -
 3 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/website/src/components/Footer.tsx b/website/src/components/Footer.tsx
index a07ba24a..5c774398 100644
--- a/website/src/components/Footer.tsx
+++ b/website/src/components/Footer.tsx
@@ -20,24 +20,21 @@ export function Footer() {
               </div>
             </div>
             <nav className="flex justify-center gap-20">
-              <div className="flex flex-col text-sm leading-7">
-                <b>Information</b>
-                <div className="flex flex-col leading-5">
-                  <Link href="#" aria-label="Our Team" className="hover:underline underline-offset-2">
-                    Our Team
-                  </Link>
-                  <Link href="/#join-us" aria-label="Join Us" className="hover:underline underline-offset-2">
-                    Join Us
-                  </Link>
-                </div>
-              </div>
               <div className="flex flex-col text-sm leading-7">
                 <b>Legal</b>
                 <div className="flex flex-col leading-5">
-                  <Link href="#" aria-label="Privacy Policy" className="hover:underline underline-offset-2">
+                  <Link
+                    href="/privacy-policy"
+                    aria-label="Privacy Policy"
+                    className="hover:underline underline-offset-2"
+                  >
                     Privacy Policy
                   </Link>
-                  <Link href="#" aria-label="Terms of Service" className="hover:underline underline-offset-2">
+                  <Link
+                    href="/terms-of-service"
+                    aria-label="Terms of Service"
+                    className="hover:underline underline-offset-2"
+                  >
                     Terms of Service
                   </Link>
                 </div>
diff --git a/website/src/components/Header/Header.tsx b/website/src/components/Header/Header.tsx
index b1cbb94d..e4965807 100644
--- a/website/src/components/Header/Header.tsx
+++ b/website/src/components/Header/Header.tsx
@@ -65,9 +65,6 @@ export function Header(props) {
               <Image src="/images/logos/logo.svg" className="mx-auto object-fill" width="50" height="50" alt="logo" />
               <span className="text-2xl font-bold ml-3">Open Assistant</span>
             </Link>
-            <div className="hidden lg:flex lg:gap-10">
-              <NavLinks />
-            </div>
           </div>
           <div className="flex items-center gap-4">
             <Popover className="lg:hidden">
@@ -102,10 +99,6 @@ export function Header(props) {
                           }}
                           className="absolute inset-x-0 top-0 z-0 origin-top rounded-b-2xl bg-white px-6 pb-6 pt-32 shadow-2xl shadow-gray-900/20"
                         >
-                          <div className="space-y-4">
-                            <MobileNavLink href="/#join-us">Join Us</MobileNavLink>
-                            <MobileNavLink href="/#faqs">FAQs</MobileNavLink>
-                          </div>
                           <div className="mt-8 flex flex-col gap-4"></div>
                         </Popover.Panel>
                       </>
diff --git a/website/src/pages/index.tsx b/website/src/pages/index.tsx
index 722abd35..8c2c34b5 100644
--- a/website/src/pages/index.tsx
+++ b/website/src/pages/index.tsx
@@ -27,7 +27,6 @@ const Home = () => {
         <main>
           <Hero />
           <CallToAction />
-
           <Faq />
         </main>
       )}

From 165a1c6b62a64efa2e5bde8b1af413c14c0b78e3 Mon Sep 17 00:00:00 2001
From: mattlongname <5335654+mattlongname@users.noreply.github.com>
Date: Sat, 31 Dec 2022 22:11:10 -0600
Subject: [PATCH 18/53] candidate platform agnostic solution

---
 .pre-commit-config.yaml |  5 +++--
 website/next-lint.js    | 24 ++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100755 website/next-lint.js

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 10578122..271c11c6 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -57,7 +57,8 @@ repos:
       - id: next-lint-website
         name: Lint website
         files: ^website/
+        exclude: ^website/node_modules/
         types_or: [javascript, jsx, ts, tsx]
-        language: system
+        language: node
         pass_filenames: false
-        entry: bash -c "cd website && npm install && npm run lint"
+        entry: website/next-lint.js
diff --git a/website/next-lint.js b/website/next-lint.js
new file mode 100755
index 00000000..0b3a5c90
--- /dev/null
+++ b/website/next-lint.js
@@ -0,0 +1,24 @@
+#!/usr/bin/env node
+const { spawnSync } = require("child_process");
+async function npmLint() {
+  const spawnOption = {
+    shell: true,
+    env: process.env,
+    stdio: "inherit",
+    cwd: "./website",
+  };
+  let npmInstall;
+  let npmRunLint;
+  try {
+    npmInstall = await spawnSync("npm", ["install"], spawnOption);
+    if (npmInstall.status !== 0) {
+      process.exit(npmInstall.status);
+    }
+    npmRunLint = await spawnSync("npm", ["run lint"], spawnOption);
+    process.exit(npmRunLint.status);
+  } catch (error) {
+    console.error(error);
+    process.exit(1);
+  }
+}
+npmLint();

From 8b1553642f8f51e6d61f05a9a8c9302691d1ef25 Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sun, 1 Jan 2023 08:22:30 +0000
Subject: [PATCH 19/53] [feature] remove dependency to download hfsummary
 manually

---
 .../reward/instructor/experimental_dataset.py |  1 +
 model/reward/instructor/rank_datasets.py      | 58 +++++++++----------
 model/reward/instructor/tests/test_dataset.py |  9 +--
 3 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/model/reward/instructor/experimental_dataset.py b/model/reward/instructor/experimental_dataset.py
index f705ccf6..85f0c899 100644
--- a/model/reward/instructor/experimental_dataset.py
+++ b/model/reward/instructor/experimental_dataset.py
@@ -17,3 +17,4 @@ from dataset import load_dataset
 from torch.utils.data import Dataset
 
 
+class 
\ No newline at end of file
diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py
index 4ba6293c..2f2260c2 100644
--- a/model/reward/instructor/rank_datasets.py
+++ b/model/reward/instructor/rank_datasets.py
@@ -112,51 +112,49 @@ class HFSummary(Dataset):
     '''
         Human feedback data from OpenAI
         https://github.com/openai/summarize-from-feedback
-
-            >> azcopy copy "https://openaipublic.blob.core.windows.net/summarize-from-feedback/dataset/*" . --recursive
         
         labeling method : pair comparison, 0 or 1
 
     '''
     def __init__(self, split='train',
-        path='summarize-from-feedback/comparisons/*.json',
         conf_threshold=-1,
-        max_comparison_per_sample=5) -> None:
+        max_comparison_per_sample=3) -> None:
         super().__init__()
-        assert split in ('train', 'valid1', 'valid2', 'test')
+        assert split in ('train', 'validation')
         summaries = {}
         # using prompt as our index will allows us
         # to add additional generated prompt later
         self.index2summary = {}
         self.max_comparison_per_sample = max_comparison_per_sample
-        for jsonl_file in glob.glob(path):
-            with open(jsonl_file, 'r') as f:
-                for line in f:
-                    data = json.loads(line)
-                    if data['split'] != split:
-                        continue
-                    if 'extra' in data and \
-                        'confidence' in data['extra'] and \
-                        conf_threshold > data['extra']['confidence']:
-                        print('skipping {}'.format(data['info']['id']))
-                        continue
+        dataset = load_dataset('Tristan/summarize_from_feedback', 'comparisons')[split]
+        for data in dataset:
+            if 'extra' in data and \
+                'confidence' in data['extra'] and \
+                data['extra']['confidence'] is not None and \
+                conf_threshold > data['extra']['confidence']:
+                print('skipping {}'.format(data['info']['id']))
+                continue
 
-                    if 'article' in data['info']:
-                        context = data['info']['article']
-                    elif 'post' in data['info']:
-                        context = data['info']['post']
+            if 'article' in data['info'] and \
+                data['info']['article'] is not None:
+                context = data['info']['article']
+            elif 'post' in data['info']:
+                context = data['info']['post']
 
-                    if context not in self.index2summary:
-                        self.index2summary[len(self.index2summary)] = context
-                    
-                    if context not in summaries:
-                        summaries[context] = []
+            if context is None:
+                continue
 
-                    pos, neg = (0, 1) if data['choice'] == 0 else (1, 0)
-                    summaries[context].append((
-                        data['summaries'][pos]['text'],
-                        data['summaries'][neg]['text']
-                    ))
+            if context not in self.index2summary:
+                self.index2summary[len(self.index2summary)] = context
+            
+            if context not in summaries:
+                summaries[context] = []
+
+            pos, neg = (0, 1) if data['choice'] == 0 else (1, 0)
+            summaries[context].append((
+                data['summaries'][pos]['text'],
+                data['summaries'][neg]['text']
+            ))
 
         self.summaries = summaries
 
diff --git a/model/reward/instructor/tests/test_dataset.py b/model/reward/instructor/tests/test_dataset.py
index c452786b..7b432fd3 100644
--- a/model/reward/instructor/tests/test_dataset.py
+++ b/model/reward/instructor/tests/test_dataset.py
@@ -1,22 +1,23 @@
 from transformers import AutoTokenizer
 from torch.utils.data import DataLoader
-from rank_datasets import WebGPT, HFSummary, DataCollatorForMultipleChoice
+from rank_datasets import WebGPT, HFSummary, DataCollatorForPairRank
 
 
 def test_hfsummary():
     
     tokenizer = AutoTokenizer.from_pretrained("bigscience/mt0-large")
-    collate_fn = DataCollatorForMultipleChoice(tokenizer, max_length=200)
+    collate_fn = DataCollatorForPairRank(tokenizer, max_length=200)
     dataset = HFSummary()
+    print(len(dataset))
     dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=8)
     for batch in dataloader:
-        print(batch['input_ids'].shape)
+        batch['input_ids'].shape
  
 
 def test_webgpt():
     
     tokenizer = AutoTokenizer.from_pretrained("bigscience/mt0-large")
-    collate_fn = DataCollatorForMultipleChoice(tokenizer, max_length=200)
+    collate_fn = DataCollatorForPairRank(tokenizer, max_length=200)
     dataset = WebGPT()
     dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=32)
     for batch in dataloader:

From 1197dccf11cc1eac750109b6f856969ab4db8361 Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sun, 1 Jan 2023 08:25:09 +0000
Subject: [PATCH 20/53] [fix] dataset split name

---
 model/reward/instructor/rank_datasets.py      | 8 ++++++--
 model/reward/instructor/tests/test_dataset.py | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py
index 2f2260c2..c2b7e58f 100644
--- a/model/reward/instructor/rank_datasets.py
+++ b/model/reward/instructor/rank_datasets.py
@@ -120,13 +120,14 @@ class HFSummary(Dataset):
         conf_threshold=-1,
         max_comparison_per_sample=3) -> None:
         super().__init__()
-        assert split in ('train', 'validation')
+        assert split in ('train', 'valid1', 'valid2', 'test')
         summaries = {}
         # using prompt as our index will allows us
         # to add additional generated prompt later
         self.index2summary = {}
         self.max_comparison_per_sample = max_comparison_per_sample
-        dataset = load_dataset('Tristan/summarize_from_feedback', 'comparisons')[split]
+        major_split = split if 'train' == split else 'validation'
+        dataset = load_dataset('Tristan/summarize_from_feedback', 'comparisons')[major_split]
         for data in dataset:
             if 'extra' in data and \
                 'confidence' in data['extra'] and \
@@ -135,6 +136,9 @@ class HFSummary(Dataset):
                 print('skipping {}'.format(data['info']['id']))
                 continue
 
+            if split != 'train' and split != data['split']:
+                continue
+
             if 'article' in data['info'] and \
                 data['info']['article'] is not None:
                 context = data['info']['article']
diff --git a/model/reward/instructor/tests/test_dataset.py b/model/reward/instructor/tests/test_dataset.py
index 7b432fd3..5765cd43 100644
--- a/model/reward/instructor/tests/test_dataset.py
+++ b/model/reward/instructor/tests/test_dataset.py
@@ -7,7 +7,7 @@ def test_hfsummary():
     
     tokenizer = AutoTokenizer.from_pretrained("bigscience/mt0-large")
     collate_fn = DataCollatorForPairRank(tokenizer, max_length=200)
-    dataset = HFSummary()
+    dataset = HFSummary('train')
     print(len(dataset))
     dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=8)
     for batch in dataloader:

From 168e9ca6b3863fa7cf09691e04ce0a575c537bfd Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sun, 1 Jan 2023 10:19:25 +0000
Subject: [PATCH 21/53] [feature] added summary quality rater

---
 model/reward/instructor/README.md             |  18 ++-
 model/reward/instructor/TODO.md               |   2 +-
 .../reward/instructor/experimental_dataset.py |  90 +++++++++++-
 model/reward/instructor/rank_datasets.py      |   4 -
 .../instructor/summary_quality_trainer.py     | 132 ++++++++++++++++++
 model/reward/instructor/tests/test_dataset.py |  15 +-
 model/reward/instructor/trainer.py            |   4 +-
 model/reward/instructor/utils.py              |   1 +
 8 files changed, 251 insertions(+), 15 deletions(-)
 create mode 100644 model/reward/instructor/summary_quality_trainer.py

diff --git a/model/reward/instructor/README.md b/model/reward/instructor/README.md
index 5992dbc0..31c25371 100644
--- a/model/reward/instructor/README.md
+++ b/model/reward/instructor/README.md
@@ -13,7 +13,7 @@ transformers
 torch==1.12
 ```
 
-Start training
+Start training reward model
 
 
 ```bash
@@ -21,6 +21,22 @@ python trainer.py configs/electra-base-dis-webgpt.yml
 ```
 
 
+Additional axis labeling, this outputs a 4 summary quality evaluation metrics (score are normalized to 0-1 )
+
+```bash
+python summary_quality_trainer.py configs/test-bloomz-560m-quality.yml
+```
+
+The four summary are :
+
+* overall
+
+* accuracy
+
+* coverage
+
+* coherence
+
 ## Dataset
 
 For now we only supports webgpt and summary dataset from OpenAI. Once open-asisstant dataset are available it will be added here.
diff --git a/model/reward/instructor/TODO.md b/model/reward/instructor/TODO.md
index ec23b7c3..1e653922 100644
--- a/model/reward/instructor/TODO.md
+++ b/model/reward/instructor/TODO.md
@@ -9,7 +9,7 @@ Some other reward features we can use
 
 * each labeling has a labeling `note`, basically comments by labeler, not sure what else we can use
 
-* Use the score for "overall", "accuracy", "coverage", "coherence" from axis/evals to train an addition model (rank additional aspect of the policy model)
+* ~~Use the score for "overall", "accuracy", "coverage", "coherence" from axis/evals to train an addition model (rank additional aspect of the policy model)~~
 
     * this should be placed under experimental_dataset.py
 
diff --git a/model/reward/instructor/experimental_dataset.py b/model/reward/instructor/experimental_dataset.py
index 85f0c899..47d20d64 100644
--- a/model/reward/instructor/experimental_dataset.py
+++ b/model/reward/instructor/experimental_dataset.py
@@ -8,13 +8,93 @@
         Should be better than just a preference score
 
 '''
-import os
-import json
-import random
 import torch
+from typing import Optional, Union
 import numpy as np
-from dataset import load_dataset
+from collections import defaultdict
+from datasets import load_dataset
+from dataclasses import dataclass
 from torch.utils.data import Dataset
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+
+
+@dataclass
+class DataCollatorForSummaryScore:
+    """
+
+    Data collator that will dynamically pad the inputs for multiple choice received.
+
+    """
+    tokenizer: PreTrainedTokenizerBase
+    num_choices: int = 2
+    padding: Union[bool, str, PaddingStrategy] = True
+    max_length: Optional[int] = None
+    pad_to_multiple_of: Optional[int] = None
+    drop_token_type: bool = False # galactica
+
+    def __call__(self, batch):
+
+        features = []
+        labels = []
+        for feature, label in batch:
+            features.append(feature)
+            labels.append(label)
+
+        batch_feature = self.tokenizer.pad(
+            features,
+            padding=self.padding,
+            max_length=self.max_length,
+            pad_to_multiple_of=self.pad_to_multiple_of,
+            return_tensors="pt",
+        )
+        if self.drop_token_type:
+            batch_feature.pop('token_type_ids')
+        # batch = {k: v.view(batch_size, self.num_choices, -1) for k, v in batch.items()}
+        batch_feature['labels'] = torch.from_numpy(np.array(labels)).float()
+        return batch_feature
+
+
+class HFSummaryQuality(Dataset):
+    def __init__(self, split, tokenizer, max_length=300) -> None:
+        super().__init__()
+        assert split in ('validation', 'test')
+        dataset = load_dataset('Tristan/summarize_from_feedback', 'axis')[split]
+        self.max_length = max_length
+        mean_scores = defaultdict(list)
+        self.contexts = []
+        self.responses = []
+        self.labels = []
+        for data in dataset:
+
+            if 'article' in data['info'] and \
+                data['info']['article'] is not None:
+                context = data['info']['article']
+            elif 'post' in data['info']:
+                context = data['info']['post']
+            self.contexts.append(context)
+
+            response = data['summary']['text']
+            self.responses.append(response)
+            self.labels.append(data['summary']['axes'])
+            for axis, score in data['summary']['axes'].items():
+                if score is not None:
+                    mean_scores[axis].append(score)
+
+        self.label2idx = { key: idx for idx, key in enumerate(mean_scores.keys()) }
+        self.label2mean = { key: np.mean(scores) for key, scores in mean_scores.items() }
+        self.tokenizer = tokenizer
+        print(self.label2idx)
+
+    def __len__(self):
+        return len(self.responses)
+
+    def __getitem__(self, index):
+        context = self.contexts[index]
+        # return pairs of comparison
+        response = self.responses[index]
+        labels = np.zeros(len(self.label2idx))
+        for key, score in self.labels[index].items():
+            labels[self.label2idx[key]] = (self.label2mean[key] if score is None else score)/10
+        return self.tokenizer(context, response, truncation=True, max_length=self.max_length), labels
 
 
-class 
\ No newline at end of file
diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py
index c2b7e58f..f38885e4 100644
--- a/model/reward/instructor/rank_datasets.py
+++ b/model/reward/instructor/rank_datasets.py
@@ -19,8 +19,6 @@
 
 '''
 from typing import Optional, Union
-import glob
-import json
 from dataclasses import dataclass
 import numpy as np
 from torch.utils.data import Dataset
@@ -145,8 +143,6 @@ class HFSummary(Dataset):
             elif 'post' in data['info']:
                 context = data['info']['post']
 
-            if context is None:
-                continue
 
             if context not in self.index2summary:
                 self.index2summary[len(self.index2summary)] = context
diff --git a/model/reward/instructor/summary_quality_trainer.py b/model/reward/instructor/summary_quality_trainer.py
new file mode 100644
index 00000000..a6604819
--- /dev/null
+++ b/model/reward/instructor/summary_quality_trainer.py
@@ -0,0 +1,132 @@
+import os
+os.environ['WANDB_PROJECT'] = 'quality-scoring'
+import torch
+import yaml
+import evaluate
+from typing import Any, Callable, List, Optional, Tuple, Union, Dict
+from torch import nn
+from argparse import ArgumentParser
+import numpy as np
+from torch.utils.data import Dataset
+from transformers import AutoModelForSequenceClassification
+from transformers import Trainer, PreTrainedModel, TrainingArguments, DataCollator, EvalPrediction, TrainerCallback, PreTrainedTokenizerBase
+from experimental_dataset import HFSummaryQuality, DataCollatorForSummaryScore
+from utils import get_tokenizer, train_val_dataset, freeze_top_n_layers, argument_parsing
+
+parser = ArgumentParser()
+parser.add_argument('config', type=str)
+
+accuracy = evaluate.load("mse")
+def compute_metrics(eval_pred):
+    predictions, labels = eval_pred
+    return accuracy.compute(predictions=predictions.flatten(), references=labels.flatten())
+
+
+class QualityTrainer(Trainer):
+    def __init__(self, model: Union[PreTrainedModel, nn.Module] = None,
+                 args: TrainingArguments = None,
+                 data_collator: Optional[DataCollator] = None,
+                 train_dataset: Optional[Dataset] = None,
+                 eval_dataset: Optional[Dataset] = None,
+                 tokenizer: Optional[PreTrainedTokenizerBase] = None,
+                 model_init: Callable[[], PreTrainedModel] = None,
+                 compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
+                 callbacks: Optional[List[TrainerCallback]] = None,
+                 optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
+                 preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] = None):
+        super().__init__(model, args, data_collator, train_dataset, eval_dataset, tokenizer,
+                         model_init, compute_metrics, callbacks, optimizers, preprocess_logits_for_metrics)
+        self.loss_fct = nn.L1Loss()
+        self.sigmoid = nn.Sigmoid()
+
+    def compute_loss(self, model, inputs, return_outputs=False):
+        labels = inputs.pop('labels')
+        # forward pass
+        outputs = model(**inputs)
+        logits = self.sigmoid(outputs.get("logits"))
+        loss = self.loss_fct(logits, labels)
+
+        return (loss, outputs) if return_outputs else loss
+
+    def _compute_loss(self, model, inputs):
+        inputs = self._prepare_inputs(inputs)
+        labels = inputs.pop('labels')
+        outputs = model(**inputs)
+        logits = self.sigmoid(outputs.get("logits"))
+        loss = self.loss_fct(logits, labels)
+
+        return loss, logits
+
+    def prediction_step(self, model: nn.Module,
+            inputs: Dict[str, Union[torch.Tensor, Any]],
+            prediction_loss_only: bool,
+            ignore_keys: Optional[List[str]] = None) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
+
+        with torch.no_grad():
+            # compute loss on predict data
+            loss, logits = self._compute_loss(model, inputs)
+
+        loss = loss.mean().detach()
+        labels = inputs['labels']
+        if self.args.prediction_loss_only:
+            return (loss, None, None)
+
+        return (loss, logits, labels)
+
+if __name__ == "__main__":
+    training_conf = argument_parsing(parser)
+
+    model_name = training_conf['model_name']
+    tokenizer = get_tokenizer(model_name)
+    collate_fn = DataCollatorForSummaryScore(tokenizer,
+        max_length=training_conf['max_length'],
+        drop_token_type= 'galactica' in model_name
+    )
+    train = HFSummaryQuality(split='validation',
+        tokenizer=tokenizer,
+        max_length=training_conf['max_length']
+        )
+    eval = HFSummaryQuality(split='test',
+            tokenizer=tokenizer,
+            max_length=training_conf['max_length']
+        )
+    model = AutoModelForSequenceClassification.from_pretrained(model_name,
+        num_labels=len(train.label2idx), problem_type='regression')
+
+    if 'freeze_layer' in training_conf:
+        num_layer = training_conf['freeze_layer']
+        model = freeze_top_n_layers(model, num_layer)
+        model_parameters = filter(lambda p: p.requires_grad, model.parameters())
+        params = sum([np.prod(p.size()) for p in model_parameters])
+        print('Number of trainable : {}M'.format(int(params/1e6)))
+
+    args = TrainingArguments(
+        output_dir=f"{model_name}-finetuned",
+        num_train_epochs=training_conf['num_train_epochs'],
+        warmup_steps=500,
+        learning_rate=training_conf['learning_rate'],
+        # half_precision_backend="apex",
+        fp16=True,
+        gradient_checkpointing=training_conf['gradient_checkpointing'],
+        gradient_accumulation_steps=training_conf['gradient_accumulation_steps'],
+        per_device_train_batch_size=training_conf['per_device_train_batch_size'],
+        per_device_eval_batch_size=training_conf['per_device_eval_batch_size'],
+        weight_decay=0.01,
+        max_grad_norm=2.0,
+        logging_steps=10,
+        save_total_limit=4,
+        evaluation_strategy='steps',
+        eval_steps=training_conf['eval_steps'],
+        save_steps=1000,
+        report_to='wandb'
+    )
+    trainer = QualityTrainer(
+        model,
+        args,
+        train_dataset=train,
+        eval_dataset=eval,
+        data_collator=collate_fn,
+        tokenizer=tokenizer,
+        compute_metrics=compute_metrics
+    )
+    trainer.train()
diff --git a/model/reward/instructor/tests/test_dataset.py b/model/reward/instructor/tests/test_dataset.py
index 5765cd43..271db83c 100644
--- a/model/reward/instructor/tests/test_dataset.py
+++ b/model/reward/instructor/tests/test_dataset.py
@@ -1,7 +1,7 @@
 from transformers import AutoTokenizer
 from torch.utils.data import DataLoader
 from rank_datasets import WebGPT, HFSummary, DataCollatorForPairRank
-
+from experimental_dataset import HFSummaryQuality, DataCollatorForSummaryScore
 
 def test_hfsummary():
     
@@ -24,6 +24,17 @@ def test_webgpt():
         print(batch['input_ids'].shape)
 
 
+def test_hf_quality():
+
+    tokenizer = AutoTokenizer.from_pretrained("bigscience/mt0-large")
+    collate_fn = DataCollatorForSummaryScore(tokenizer, max_length=200)
+    dataset = HFSummaryQuality('validation', tokenizer)
+    dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=32)
+    for batch in dataloader:
+        print(batch['input_ids'].shape)
+
+
+
 if __name__ == "__main__":
-    test_hfsummary()
+    test_hf_quality()
     # test_webgpt()
\ No newline at end of file
diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py
index 391464c6..c8063cf7 100644
--- a/model/reward/instructor/trainer.py
+++ b/model/reward/instructor/trainer.py
@@ -117,13 +117,13 @@ if __name__ == "__main__":
         gradient_checkpointing=training_conf['gradient_checkpointing'],
         gradient_accumulation_steps=training_conf['gradient_accumulation_steps'],
         per_device_train_batch_size=training_conf['per_device_train_batch_size'],
-        per_device_eval_batch_size=5,
+        per_device_eval_batch_size=training_conf['per_device_eval_batch_size'],
         weight_decay=0.01,
         max_grad_norm=2.0,
         logging_steps=10,
         save_total_limit=4,
         evaluation_strategy='steps',
-        eval_steps=500,
+        eval_steps=training_conf['eval_steps'],
         save_steps=1000,
         report_to='wandb'
     )
diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py
index f26add55..d59bb13c 100644
--- a/model/reward/instructor/utils.py
+++ b/model/reward/instructor/utils.py
@@ -69,6 +69,7 @@ def argument_parsing(parser):
         'eval_steps': 500,
         'loss': 'rank',
         'max_length': 440,
+        'per_device_eval_batch_size': 5,
         'per_device_train_batch_size': 8,
         'gradient_accumulation_steps': 8,
         'gradient_checkpointing': False,

From f15f2c29ed4a106a0c921113671f910f57b19412 Mon Sep 17 00:00:00 2001
From: Alexander Goryunov <alex.goryunov@gmail.com>
Date: Sun, 1 Jan 2023 12:49:05 +0200
Subject: [PATCH 22/53] Fixed text formatting and made code more Pythonic

---
 .../EssayInstructions.ipynb                   | 277 +++++++++++++-----
 1 file changed, 198 insertions(+), 79 deletions(-)

diff --git a/notebooks/data-argumentation/EssayInstructions.ipynb b/notebooks/data-argumentation/EssayInstructions.ipynb
index ec534887..d267ef03 100644
--- a/notebooks/data-argumentation/EssayInstructions.ipynb
+++ b/notebooks/data-argumentation/EssayInstructions.ipynb
@@ -1,135 +1,201 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    }
-  },
   "cells": [
     {
       "cell_type": "code",
-      "source": [
-        "!pip install transformers"
-      ],
+      "execution_count": null,
       "metadata": {
         "id": "8zsmJ96eaL2w"
       },
-      "execution_count": null,
-      "outputs": []
+      "outputs": [],
+      "source": [
+        "!pip install transformers"
+      ]
     },
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "Pt6qbTsjW7Kp"
+      },
       "source": [
         "Put your essay here, [source of the essay used ](https://https://www.thewisdompost.com/essay/technology-essay/3387#essay-on-technology-for-college-and-university-students-essay-2-750-words)\n",
         "\n",
-        "Saperate paragraphs with one blank line\n",
+        "Separate paragraphs with one blank line\n",
         "(this step is annoying but important)\n"
-      ],
-      "metadata": {
-        "id": "Pt6qbTsjW7Kp"
-      }
+      ]
     },
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 2,
       "metadata": {
         "id": "d_5_BDFNWneB"
       },
       "outputs": [],
       "source": [
         "essay = \"\"\"\n",
-        "We live in a world driven by technology — hardly anyone would argue with you if you said this. Technology, literally meaning the “science of craft”, refers to the collection of techniques, skills, methods, and processes used to produce goods or services or for accomplishing objectives such as scientific investigation. Technology can be embedded in machines enabling them to be used by people even without a detailed knowledge of their inner workings.\n",
-        "Technological growth is closely linked to the expansion of scientific research and knowledge. In the last 50 years, thanks to the exponential increases in computing power and microchip design and manufacture, there has been unprecedented innovation and technological growth in nearly every field of human endeavour from health and transport to industrial production and education.\n",
+        "We live in a world driven by technology — hardly anyone would argue with you if you said this. \n",
+        "Technology, literally meaning the “science of craft”, refers to the collection of techniques, \n",
+        "skills, methods, and processes used to produce goods or services or for accomplishing objectives \n",
+        "such as scientific investigation. Technology can be embedded in machines enabling them to be \n",
+        "used by people even without a detailed knowledge of their inner workings. Technological growth \n",
+        "is closely linked to the expansion of scientific research and knowledge. In the last 50 years, \n",
+        "thanks to the exponential increases in computing power and microchip design and manufacture, \n",
+        "there has been unprecedented innovation and technological growth in nearly every field of human \n",
+        "endeavour from health and transport to industrial production and education.\n",
         "\n",
-        "It is automotive technology that drives today’s electric and hybrid cars, and which will drive tomorrow’s driverless cars, hover-taxis and space cabs.\n",
-        "It is technology that drives the ubiquitous mobile phones that you will now find in the hands of even the poorest of the world’s poor. It is technology that creates hybrid seeds that resist inhospitable climatic conditions and difficult terrain, giving high yields in shorter times.\n",
-        "It is advancing medical technology that makes remote surgery, minimally invasive surgery and life-saving cures using stem cell transplants. Technology puts spacecrafts on asteroids and distant planets and lets us see new worlds. Technology splits atoms, revealing their secrets, and gives us ways to exploit them to create energy, quantum storage for data, and virtual reality games.\n",
+        "It is automotive technology that drives today’s electric and hybrid cars, and which will drive \n",
+        "tomorrow’s driverless cars, hover-taxis and space cabs. It is technology that drives the \n",
+        "ubiquitous mobile phones that you will now find in the hands of even the poorest of the world’s \n",
+        "poor. It is technology that creates hybrid seeds that resist inhospitable climatic conditions \n",
+        "and difficult terrain, giving high yields in shorter times. It is advancing medical technology \n",
+        "that makes remote surgery, minimally invasive surgery and life-saving cures using stem cell \n",
+        "transplants. Technology puts spacecrafts on asteroids and distant planets and lets us see \n",
+        "new worlds. Technology splits atoms, revealing their secrets, and gives us ways to exploit \n",
+        "them to create energy, quantum storage for data, and virtual reality games.\n",
         "\n",
-        "There are people who strongly oppose technology and claim that it spells the death of ‘humanity’, and that we are approaching the day when machines will rule everything. They refer to fans of technology as ‘techies’ or sometimes ‘geeks’. On the other hand, proponents of technology call these people Luddites, a derogatory name for someone who is opposed to industrialisation, automation, computerisation and new technologies in general.\n",
-        "Is this true? Is technology really a curse disguised as a blessing? Many believe that the convergence of biotechnology and AI might be the most consequential development of all.\n",
+        "There are people who strongly oppose technology and claim that it spells the death of \n",
+        "‘humanity’, and that we are approaching the day when machines will rule everything. They refer \n",
+        "to fans of technology as ‘techies’ or sometimes ‘geeks’. On the other hand, proponents of \n",
+        "technology call these people Luddites, a derogatory name for someone who is opposed to \n",
+        "industrialisation, automation, computerisation and new technologies in general.\n",
+        "Is this true? Is technology really a curse disguised as a blessing? Many believe that the \n",
+        "convergence of biotechnology and AI might be the most consequential development of all.\n",
         "\n",
-        "In the last five decades, two areas in particular have grown faster than the rest, powered by research and advances in computing power. One is artificial intelligence, or AI; the other is biotechnology. Huge benefits have emerged from each of them for human beings in general, such as self-driving cars — which will dramatically reduce the death rate from road accidents — and robotic surgery, which enables precise, highly efficient and targeted surgical interventions.\n",
-        "Yet, visionaries like Yuval Noah Harari, author of the best-selling Homo sapiens and Deus, are now warning that the convergence of biotechnology and AI will irreversibly and unpredictably change both the quality of human life and its challenges in the next few decades. A good example of this is the facial recognition technology that is now present in all photo management programs. The AI in the software is capable of not only spotting the faces in every photograph but also recognising the person by name.\n",
-        "This technology has now expanded so that photo apps can recognise cats, dogs, beaches, mountains and cars too. Computers with AI are already correctly identifying human emotions through observing facial expressions and body movements. Some robots are able to mimic human emotions. This is called affective computing, sometimes called artificial emotional intelligence, and refers to the study and development of systems and devices that can recognize, interpret, process, and simulate human affects.\n",
+        "In the last five decades, two areas in particular have grown faster than the rest, powered \n",
+        "by research and advances in computing power. One is artificial intelligence, or AI; the other \n",
+        "is biotechnology. Huge benefits have emerged from each of them for human beings in general, \n",
+        "such as self-driving cars — which will dramatically reduce the death rate from road accidents \n",
+        "— and robotic surgery, which enables precise, highly efficient and targeted surgical \n",
+        "interventions. Yet, visionaries like Yuval Noah Harari, author of the best-selling \"Homo \n",
+        "Sapiens\" and \"Deus\", are now warning that the convergence of biotechnology and AI will \n",
+        "irreversibly and unpredictably change both the quality of human life and its challenges in \n",
+        "the next few decades. A good example of this is the facial recognition technology that is \n",
+        "now present in all photo management programs. The AI in the software is capable of not \n",
+        "only spotting the faces in every photograph but also recognising the person by name.\n",
+        "This technology has now expanded so that photo apps can recognise cats, dogs, beaches, \n",
+        "mountains and cars too. Computers with AI are already correctly identifying human emotions \n",
+        "through observing facial expressions and body movements. Some robots are able to mimic \n",
+        "human emotions. This is called affective computing, sometimes called artificial emotional \n",
+        "intelligence, and refers to the study and development of systems and devices that can \n",
+        "recognize, interpret, process, and simulate human affects.\n",
         "\n",
         "How could this be a negative?\n",
-        "The ability to read human emotions is just a step away from predicting human emotions. For example, if a computer attached to a video camera could identify which products a consumer is showing greater interest in or which ones he is really keen to buy, various tactics could be used to influence her to buy it.\n",
-        "Activists worry that computers that can understand and anticipate human wishes and desires by scanning their irises and analysing their micro-expressions could also be programmed to exploit and manipulate them.\n",
-        "Another very real fear is that humanoid computers with human-like skin, speech, and expressions could jeopardise and dehumanise relationship and create emotional vacuums.\n",
+        "The ability to read human emotions is just a step away from predicting human emotions. For \n",
+        "example, if a computer attached to a video camera could identify which products a consumer \n",
+        "is showing greater interest in or which ones he is really keen to buy, various tactics \n",
+        "could be used to influence her to buy it. Activists worry that computers that can understand \n",
+        "and anticipate human wishes and desires by scanning their irises and analysing their \n",
+        "micro-expressions could also be programmed to exploit and manipulate them. Another very real \n",
+        "fear is that humanoid computers with human-like skin, speech, and expressions could jeopardise \n",
+        "and dehumanise relationship and create emotional vacuums.\n",
         "\n",
-        "An enduring fear of Luddites has always been that computers will rob humans of their livelihood by taking their jobs and doing them more efficiently at lower cost. However, in reality the exact opposite has happened. As computerised machines began taking over mechanical and repetitive human activities, new jobs for people opened up that needs thinking and analytical skills and judgement, or human interpersonal skills. A good example is the worldwide proliferation of call centres.\n",
-        "When drones were invented many feared that pilots would soon be redundant. However, few people know that it takes almost 30 people to fly one military drone, and an additional 50 people to analyze and make sense of the data being streamed back by the drone.\n",
-        "The US army suffers from a serious shortage of trained, high quality drone pilots; anyone who masters this skill will have a job. But a social scientist warns that in 10 years, it is certain that computers will be flying that drone and humans will be redundant. Equally sure is that some brand new skill requirement will have opened up with advancing technology, calling for new talents.\n",
+        "An enduring fear of Luddites has always been that computers will rob humans of their \n",
+        "livelihood by taking their jobs and doing them more efficiently at lower cost. However, in \n",
+        "reality the exact opposite has happened. As computerised machines began taking over mechanical \n",
+        "and repetitive human activities, new jobs for people opened up that needs thinking and \n",
+        "analytical skills and judgement, or human interpersonal skills. A good example is the \n",
+        "worldwide proliferation of call centres. When drones were invented many feared that pilots \n",
+        "would soon be redundant. However, few people know that it takes almost 30 people to fly \n",
+        "one military drone, and an additional 50 people to analyze and make sense of the data being \n",
+        "streamed back by the drone. The US army suffers from a serious shortage of trained, high \n",
+        "quality drone pilots; anyone who masters this skill will have a job. But a social scientist \n",
+        "warns that in 10 years, it is certain that computers will be flying that drone and humans \n",
+        "will be redundant. Equally sure is that some brand new skill requirement will have opened \n",
+        "up with advancing technology, calling for new talents.\n",
         "\n",
-        "In the 20th century, a young man was supposed to choose a skill, vocation or profession, master it through education and practice, and then earn a living from it till he or she retired. However, the fast-changing nature of technology is making skills obsolete at a higher rate than ever before. To survive, tomorrow young man must keep re-inventing himself and updating his skills continuously. Life could be difficult if every new skill has a shelf life of only a decade or so.\n",
-        "Or perhaps one could look at it the other way — and say that changing technology will keep human beings on their toes throughout their life.\n",
+        "In the 20th century, a young man was supposed to choose a skill, vocation or profession, \n",
+        "master it through education and practice, and then earn a living from it till he or she \n",
+        "retired. However, the fast-changing nature of technology is making skills obsolete at a \n",
+        "higher rate than ever before. To survive, tomorrow young man must keep re-inventing himself \n",
+        "and updating his skills continuously. Life could be difficult if every new skill has a shelf \n",
+        "life of only a decade or so. Or perhaps one could look at it the other way — and say that \n",
+        "changing technology will keep human beings on their toes throughout their life.\n",
         "\n",
-        "Technology is the result of human inventiveness. It reflects our evolutionary heritage. We are neither strong like gorillas or tigers, nor fast like cheetahs and hawks, but our brains and thinking powers have given us the greatest edge of any species on the planet. Technology is a result.\n",
-        "Technology is either inherently good or bad; it is how we use it that makes it so. The splitting of a hydrogen atom is technology at work. As history has shown us, technology can equally be used to make a nuclear bomb that kills millions — or generate electricity that lights up a million homes.\n",
+        "Technology is the result of human inventiveness. It reflects our evolutionary heritage. We \n",
+        "are neither strong like gorillas or tigers, nor fast like cheetahs and hawks, but our \n",
+        "brains and thinking powers have given us the greatest edge of any species on the planet. \n",
+        "Technology is a result. Technology is either inherently good or bad; it is how we use it \n",
+        "that makes it so. The splitting of a hydrogen atom is technology at work. As history has \n",
+        "shown us, technology can equally be used to make a nuclear bomb that kills millions — or \n",
+        "generate electricity that lights up a million homes.\n",
         "\"\"\""
       ]
     },
     {
       "cell_type": "code",
-      "source": [
-        "essay_paragraphs = essay.split('\\n\\n')"
-      ],
+      "execution_count": 3,
       "metadata": {
         "id": "JESY8Y10W6hQ"
       },
-      "execution_count": null,
-      "outputs": []
+      "outputs": [],
+      "source": [
+        "essay_paragraphs = essay.split('\\n\\n')"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "id": "t1G-ZiHbZZ-Y"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "2023-01-01 12:18:07.154473: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n",
+            "2023-01-01 12:18:07.154505: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n"
+          ]
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "c0472f74fc4248f9811b163d5487e707",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Downloading:   0%|          | 0.00/1.39k [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "application/vnd.jupyter.widget-view+json": {
+              "model_id": "65a9c842713046e680dad6e660f672f8",
+              "version_major": 2,
+              "version_minor": 0
+            },
+            "text/plain": [
+              "Downloading:   0%|          | 0.00/892M [00:00<?, ?B/s]"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        }
+      ],
       "source": [
         "model_name = \"snrspeaks/t5-one-line-summary\"\n",
         "\n",
         "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n",
         "model = AutoModelForSeq2SeqLM.from_pretrained(model_name)\n",
         "tokenizer = AutoTokenizer.from_pretrained(model_name)"
-      ],
-      "metadata": {
-        "id": "t1G-ZiHbZZ-Y"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
     },
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "8BARyupEemZ-"
+      },
       "source": [
         "## Results\n",
         "Please at least check what is generated here, it's usually good but sometimes it's bs"
-      ],
-      "metadata": {
-        "id": "8BARyupEemZ-"
-      }
+      ]
     },
     {
       "cell_type": "code",
-      "source": [
-        "preds = []\n",
-        "\n",
-        "for i in range(0, len(essay_paragraphs)):\n",
-        "  input_ids = tokenizer.encode(essay_paragraphs[i], return_tensors=\"pt\", add_special_tokens=True)\n",
-        "  generated_ids = model.generate(input_ids=input_ids,num_beams=5,max_length=35,repetition_penalty=4.5,length_penalty=1.5,early_stopping=True,num_return_sequences=1)\n",
-        "  preds.append(tokenizer.decode(generated_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=True))\n",
-        "\n",
-        "print('Write an intro paragraph to an essay called', preds[0].lower())\n",
-        "\n",
-        "for i in range(1, len(preds) - 1):\n",
-        "  print('Write a paragraph to an essay about', preds[i].lower())\n",
-        "\n",
-        "print('Write a concluding paragraph about', preds[len(preds) - 1].lower())"
-      ],
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -137,11 +203,10 @@
         "id": "eyR58KFRae7n",
         "outputId": "b8e4bc29-be89-43c3-d1bc-7e90525c0e09"
       },
-      "execution_count": null,
       "outputs": [
         {
-          "output_type": "stream",
           "name": "stdout",
+          "output_type": "stream",
           "text": [
             "Write an intro paragraph to an essay called the rise and fall of technology\n",
             "Write a paragraph to an essay about technology that drives modern autonomy, hybrid cars, hover-taxis and space cabs\n",
@@ -154,7 +219,61 @@
             "Write a concluding paragraph about human inventiveness and technology\n"
           ]
         }
+      ],
+      "source": [
+        "preds = []\n",
+        "\n",
+        "for para in essay_paragraphs:\n",
+        "  input_ids = tokenizer.encode(para, return_tensors=\"pt\", add_special_tokens=True)\n",
+        "  generated_ids = model.generate(input_ids=input_ids,\n",
+        "                                 num_beams=5,\n",
+        "                                 max_length=35,\n",
+        "                                 repetition_penalty=4.5,\n",
+        "                                 length_penalty=1.5,\n",
+        "                                 early_stopping=True,\n",
+        "                                 num_return_sequences=1)\n",
+        "  preds.append(tokenizer.decode(generated_ids[0], \n",
+        "                                skip_special_tokens=True, \n",
+        "                                clean_up_tokenization_spaces=True))\n",
+        "\n",
+        "prompts = ['Write an intro paragraph to an essay called'] + \\\n",
+        "          ['Write a paragraph to an essay about']*len(preds[1:-1]) + \\\n",
+        "          ['Write a concluding paragraph about']\n",
+        "\n",
+        "assert len(preds) == len(prompts)\n",
+        "\n",
+        "for prompt, pred in zip(prompts, preds):\n",
+        "  print(prompt, pred.lower())"
       ]
     }
-  ]
-}
\ No newline at end of file
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3.8.10 64-bit",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8.10"
+    },
+    "vscode": {
+      "interpreter": {
+        "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+      }
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

From 0f7aa6e02a0de654ea3889481bcb693deed8f968 Mon Sep 17 00:00:00 2001
From: Alexander Goryunov <alex.goryunov@gmail.com>
Date: Sun, 1 Jan 2023 12:57:01 +0200
Subject: [PATCH 23/53] Removed cell outputs

---
 .../EssayInstructions.ipynb                   | 59 +------------------
 1 file changed, 3 insertions(+), 56 deletions(-)

diff --git a/notebooks/data-argumentation/EssayInstructions.ipynb b/notebooks/data-argumentation/EssayInstructions.ipynb
index d267ef03..b81a8b09 100644
--- a/notebooks/data-argumentation/EssayInstructions.ipynb
+++ b/notebooks/data-argumentation/EssayInstructions.ipynb
@@ -133,48 +133,11 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 4,
+      "execution_count": null,
       "metadata": {
         "id": "t1G-ZiHbZZ-Y"
       },
-      "outputs": [
-        {
-          "name": "stderr",
-          "output_type": "stream",
-          "text": [
-            "2023-01-01 12:18:07.154473: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n",
-            "2023-01-01 12:18:07.154505: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n"
-          ]
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "c0472f74fc4248f9811b163d5487e707",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Downloading:   0%|          | 0.00/1.39k [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        },
-        {
-          "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "65a9c842713046e680dad6e660f672f8",
-              "version_major": 2,
-              "version_minor": 0
-            },
-            "text/plain": [
-              "Downloading:   0%|          | 0.00/892M [00:00<?, ?B/s]"
-            ]
-          },
-          "metadata": {},
-          "output_type": "display_data"
-        }
-      ],
+      "outputs": [],
       "source": [
         "model_name = \"snrspeaks/t5-one-line-summary\"\n",
         "\n",
@@ -203,23 +166,7 @@
         "id": "eyR58KFRae7n",
         "outputId": "b8e4bc29-be89-43c3-d1bc-7e90525c0e09"
       },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Write an intro paragraph to an essay called the rise and fall of technology\n",
-            "Write a paragraph to an essay about technology that drives modern autonomy, hybrid cars, hover-taxis and space cabs\n",
-            "Write a paragraph to an essay about luddites: why technology is a blessing?\n",
-            "Write a paragraph to an essay about artificial emotional intelligence\n",
-            "Write a paragraph to an essay about how could that be a negative?\n",
-            "Write a paragraph to an essay about detecting and manipulating human emotions\n",
-            "Write a paragraph to an essay about the rise and fall of human-client skills\n",
-            "Write a paragraph to an essay about changing technology will keep human beings on their toes throughout their life\n",
-            "Write a concluding paragraph about human inventiveness and technology\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "preds = []\n",
         "\n",

From 5a18fba9e6e80f69e0efe88db6dd268463c4c5d9 Mon Sep 17 00:00:00 2001
From: Graeme Harris <harris.graemeza@gmail.com>
Date: Sat, 31 Dec 2022 01:48:37 +0200
Subject: [PATCH 24/53] Added basic redis + redis insights config for docker
 and ansible

---
 ansible/dev.yaml    | 18 ++++++++++++++++++
 docker-compose.yaml | 27 +++++++++++++++++++++++++++
 redis.conf          |  2 ++
 3 files changed, 47 insertions(+)
 create mode 100644 redis.conf

diff --git a/ansible/dev.yaml b/ansible/dev.yaml
index d022ba3c..53fdc611 100644
--- a/ansible/dev.yaml
+++ b/ansible/dev.yaml
@@ -32,6 +32,24 @@
         - name: oasst-postgres
         - name: oasst-postgres-web
 
+    - name: Setup redis
+      community.docker.docker_container:
+        name: "{{ item.name }}"
+        image: redis
+        state: started
+        restart_policy: always
+        network_mode: oasst
+        volumes:
+          - redis:/data
+        healthcheck:
+          test: ["CMD-SHELL", "redis-cli ping | grep PONG"]
+          interval: 2s
+          timeout: 2s
+          retries: 10
+        ports:
+          - 6379:6379
+        command: redis-server /usr/local/etc/redis/redis.conf
+
     - name: Set up maildev
       community.docker.docker_container:
         name: oasst-maildev
diff --git a/docker-compose.yaml b/docker-compose.yaml
index d329c780..a5b90498 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -27,6 +27,29 @@ services:
       timeout: 2s
       retries: 10
 
+  # Redis - caching + rate limiting on BE
+  redis:
+    image: redis
+    restart: always
+    ports:
+      - 6379:6379
+    healthcheck:
+      test: ["CMD-SHELL", "redis-cli ping | grep PONG"]
+      interval: 2s
+      timeout: 2s
+      retries : 10
+    command: redis-server /usr/local/etc/redis/redis.conf
+    volumes:
+      - ./redis.conf:/usr/local/etc/redis/redis.conf
+      - redis:/data
+  # insights host - redis:6379
+  redis-insights:
+    image: redislabs/redisinsight:latest
+    ports:
+      - 8001:8001
+    volumes:
+      - redis:/data
+
   # This DB is for Web Authentication and data caching.
   webdb:
     image: postgres
@@ -100,3 +123,7 @@ services:
     ports:
       - "3000:3000"
     command: bash wait-for-postgres.sh node server.js
+
+volumes:
+  redis:
+    driver: local
diff --git a/redis.conf b/redis.conf
new file mode 100644
index 00000000..c06efe47
--- /dev/null
+++ b/redis.conf
@@ -0,0 +1,2 @@
+maxmemory 100mb
+maxmemory-policy allkeys-lru
\ No newline at end of file

From 743de0bb18da1f06061630fbcf4eb9cdaa781b90 Mon Sep 17 00:00:00 2001
From: Graeme Harris <harris.graemeza@gmail.com>
Date: Sat, 31 Dec 2022 01:49:34 +0200
Subject: [PATCH 25/53] Linting + prettier

---
 docker-compose.yaml | 2 +-
 redis.conf          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker-compose.yaml b/docker-compose.yaml
index a5b90498..27f5d741 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -37,7 +37,7 @@ services:
       test: ["CMD-SHELL", "redis-cli ping | grep PONG"]
       interval: 2s
       timeout: 2s
-      retries : 10
+      retries: 10
     command: redis-server /usr/local/etc/redis/redis.conf
     volumes:
       - ./redis.conf:/usr/local/etc/redis/redis.conf
diff --git a/redis.conf b/redis.conf
index c06efe47..58da1e05 100644
--- a/redis.conf
+++ b/redis.conf
@@ -1,2 +1,2 @@
 maxmemory 100mb
-maxmemory-policy allkeys-lru
\ No newline at end of file
+maxmemory-policy allkeys-lru

From 3bb0b04f92c8040ac26b5aebf061d9b911245af6 Mon Sep 17 00:00:00 2001
From: Graeme Harris <harris.graemeza@gmail.com>
Date: Sat, 31 Dec 2022 12:56:09 +0200
Subject: [PATCH 26/53] Removed local volume from redis image

---
 docker-compose.yaml | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/docker-compose.yaml b/docker-compose.yaml
index 27f5d741..ed72c820 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -41,14 +41,11 @@ services:
     command: redis-server /usr/local/etc/redis/redis.conf
     volumes:
       - ./redis.conf:/usr/local/etc/redis/redis.conf
-      - redis:/data
   # insights host - redis:6379
   redis-insights:
     image: redislabs/redisinsight:latest
     ports:
       - 8001:8001
-    volumes:
-      - redis:/data
 
   # This DB is for Web Authentication and data caching.
   webdb:
@@ -123,7 +120,3 @@ services:
     ports:
       - "3000:3000"
     command: bash wait-for-postgres.sh node server.js
-
-volumes:
-  redis:
-    driver: local

From 1ddd9155f91ae8801b4c4567ffb7bc1131a8dc0a Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sun, 1 Jan 2023 11:35:49 +0000
Subject: [PATCH 27/53] [fix] remove vscode settings

---
 .vscode/settings.json | 4 ----
 1 file changed, 4 deletions(-)
 delete mode 100644 .vscode/settings.json

diff --git a/.vscode/settings.json b/.vscode/settings.json
deleted file mode 100644
index 4c58a32f..00000000
--- a/.vscode/settings.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-  "python.formatting.provider": "autopep8",
-  "python.analysis.extraPaths": ["${workspaceFolder}/oasst-shared"]
-}

From 10724411eb139204a7bf435df1312fd5fc16fde3 Mon Sep 17 00:00:00 2001
From: Graeme Harris <harris.graemeza@gmail.com>
Date: Sun, 1 Jan 2023 13:37:01 +0200
Subject: [PATCH 28/53] Removed redis from ansible for now

---
 ansible/dev.yaml | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/ansible/dev.yaml b/ansible/dev.yaml
index 53fdc611..d022ba3c 100644
--- a/ansible/dev.yaml
+++ b/ansible/dev.yaml
@@ -32,24 +32,6 @@
         - name: oasst-postgres
         - name: oasst-postgres-web
 
-    - name: Setup redis
-      community.docker.docker_container:
-        name: "{{ item.name }}"
-        image: redis
-        state: started
-        restart_policy: always
-        network_mode: oasst
-        volumes:
-          - redis:/data
-        healthcheck:
-          test: ["CMD-SHELL", "redis-cli ping | grep PONG"]
-          interval: 2s
-          timeout: 2s
-          retries: 10
-        ports:
-          - 6379:6379
-        command: redis-server /usr/local/etc/redis/redis.conf
-
     - name: Set up maildev
       community.docker.docker_container:
         name: oasst-maildev

From fe99b46f2e02f48a55165684a61fafa4dc5c823a Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sun, 1 Jan 2023 11:43:15 +0000
Subject: [PATCH 29/53] [fix] pre-commit update

---
 model/reward/instructor/README.md             |  11 +-
 model/reward/instructor/TODO.md               |  18 +--
 model/reward/instructor/cls_dataset.py        |  37 +++--
 .../configs/bloomz-560m-summary.yml           |   2 +-
 .../reward/instructor/configs/bloomz-560m.yml |   2 +-
 .../configs/electra-base-dis-webgpt.yml       |   2 +-
 .../instructor/configs/galactica-125m.yml     |   2 +-
 .../instructor/configs/galactica-1b.yml       |   2 +-
 .../test-galactica-125m-classification.yml    |   2 +-
 .../reward/instructor/experimental_dataset.py |  50 +++---
 model/reward/instructor/rank_datasets.py      | 104 ++++++------
 .../instructor/summary_quality_trainer.py     | 140 ++++++++++-------
 model/reward/instructor/tests/test_dataset.py |  27 ++--
 model/reward/instructor/trainer.py            | 148 +++++++++++-------
 model/reward/instructor/utils.py              |  84 +++++-----
 15 files changed, 337 insertions(+), 294 deletions(-)

diff --git a/model/reward/instructor/README.md b/model/reward/instructor/README.md
index 31c25371..73a872a0 100644
--- a/model/reward/instructor/README.md
+++ b/model/reward/instructor/README.md
@@ -2,7 +2,6 @@
 
 Trainer code based on huggingface. Compatible with deepspeed or accelerate
 
-
 Requirements
 
 ```
@@ -15,12 +14,10 @@ torch==1.12
 
 Start training reward model
 
-
 ```bash
 python trainer.py configs/electra-base-dis-webgpt.yml
 ```
 
-
 Additional axis labeling, this outputs a 4 summary quality evaluation metrics (score are normalized to 0-1 )
 
 ```bash
@@ -29,13 +26,13 @@ python summary_quality_trainer.py configs/test-bloomz-560m-quality.yml
 
 The four summary are :
 
-* overall
+- overall
 
-* accuracy
+- accuracy
 
-* coverage
+- coverage
 
-* coherence
+- coherence
 
 ## Dataset
 
diff --git a/model/reward/instructor/TODO.md b/model/reward/instructor/TODO.md
index 1e653922..ed33b3c0 100644
--- a/model/reward/instructor/TODO.md
+++ b/model/reward/instructor/TODO.md
@@ -1,23 +1,19 @@
-
 Some other reward features we can use
 
-0. Finish classifcation feature 
+0. Finish classifcation feature
 
 1. Summaries from human feedback
 
-* use `confidence` score into the RM learning, ensure the output rank score correlates with confidence
+- use `confidence` score into the RM learning, ensure the output rank score correlates with confidence
 
-* each labeling has a labeling `note`, basically comments by labeler, not sure what else we can use
+- each labeling has a labeling `note`, basically comments by labeler, not sure what else we can use
 
-* ~~Use the score for "overall", "accuracy", "coverage", "coherence" from axis/evals to train an addition model (rank additional aspect of the policy model)~~
-
-    * this should be placed under experimental_dataset.py
+- ~~Use the score for "overall", "accuracy", "coverage", "coherence" from axis/evals to train an addition model (rank additional aspect of the policy model)~~
 
+  - this should be placed under experimental_dataset.py
 
 2. Add support for anthropic dataset
 
-* anthropic dataset is more like a conversation tree which is much complex than simply question-answer schema
-
-    * this is basically a MCTS from alphazero.
-
+- anthropic dataset is more like a conversation tree which is much complex than simply question-answer schema
 
+  - this is basically a MCTS from alphazero.
diff --git a/model/reward/instructor/cls_dataset.py b/model/reward/instructor/cls_dataset.py
index ff824d19..09aa821b 100644
--- a/model/reward/instructor/cls_dataset.py
+++ b/model/reward/instructor/cls_dataset.py
@@ -1,32 +1,34 @@
-'''
+# -*- coding: utf-8 -*-
+"""
 
     classification based ranking
 
-'''
-import os
+"""
 import json
+import os
 import random
-import torch
-import numpy as np
+
 from dataset import load_dataset
 from torch.utils.data import Dataset
+
 from .utils import webgpt_return_format
 
+
 class WebGPTDataset(Dataset):
-    def __init__(self, mode='train', index_cache='dataset/webgpt_train_idx.pt', additional_dataset=None) -> None:
+    def __init__(self, mode="train", index_cache="dataset/webgpt_train_idx.pt", additional_dataset=None) -> None:
         super().__init__()
-        '''
+        """
             mode : train or val, used for validation purpose, has nothing to do with original split
             additional_dataset : a list of jsonline format with idx, question and texts (generate candidates)
                 idx : must match the index you iterate from comparison enumerate order
                 question : for validation purpose
                 texts : list of K generate results from the question prompt
-        '''
-        os.makedirs('dataset', exist_ok=True)
+        """
+        os.makedirs("dataset", exist_ok=True)
         dataset = load_dataset("openai/webgpt_comparisons")
         self.dataset = []
         self.dataset_index = []
-        for idx, row in enumerate(dataset['train']):
+        for idx, row in enumerate(dataset["train"]):
             self.dataset.append(webgpt_return_format(row))
 
         # since this dataset was generated from 176B GPT-3
@@ -36,17 +38,17 @@ class WebGPTDataset(Dataset):
         if additional_dataset is not None:
             self.sample_additional = True
             self.additional = {}
-            with open(additional_dataset, 'r') as f:
+            with open(additional_dataset, "r") as f:
                 for line in f:
                     row = json.loads(line)
-                    if row['idx'] in self.dataset_index:
-                        self.additional[row['idx']] = row['negatives']
+                    if row["idx"] in self.dataset_index:
+                        self.additional[row["idx"]] = row["negatives"]
             if len(self.additional) != len(self.dataset_index):
                 for match_idx in self.dataset_index:
                     if match_idx in self.additional:
                         continue
 
-                    idx = match_idx-900
+                    idx = match_idx - 900
                     while idx not in self.additional:
                         idx -= 1
                     self.additional[match_idx] = self.additional[idx]
@@ -57,10 +59,7 @@ class WebGPTDataset(Dataset):
     def __getitem__(self, index):
         row = self.dataset[index]
         if not self.sample_additional:
-            return row['question'], row['pos'], row['neg']
+            return row["question"], row["pos"], row["neg"]
 
         gen_neg = random.choice(self.additional[self.dataset_index[index]])
-        return row['question'], row['pos'], row['neg'], gen_neg
-
-
-
+        return row["question"], row["pos"], row["neg"], gen_neg
diff --git a/model/reward/instructor/configs/bloomz-560m-summary.yml b/model/reward/instructor/configs/bloomz-560m-summary.yml
index a02f4e4a..55ed6cd1 100644
--- a/model/reward/instructor/configs/bloomz-560m-summary.yml
+++ b/model/reward/instructor/configs/bloomz-560m-summary.yml
@@ -6,4 +6,4 @@ max_length: 600
 freeze_layer: 12
 num_train_epochs: 2
 datasets:
-  - hfsummary
\ No newline at end of file
+  - hfsummary
diff --git a/model/reward/instructor/configs/bloomz-560m.yml b/model/reward/instructor/configs/bloomz-560m.yml
index c8f55746..bf3f14dd 100644
--- a/model/reward/instructor/configs/bloomz-560m.yml
+++ b/model/reward/instructor/configs/bloomz-560m.yml
@@ -7,4 +7,4 @@ freeze_layer: 12
 num_train_epochs: 2
 datasets:
   - webgpt
-  - hfsummary
\ No newline at end of file
+  - hfsummary
diff --git a/model/reward/instructor/configs/electra-base-dis-webgpt.yml b/model/reward/instructor/configs/electra-base-dis-webgpt.yml
index fc168b63..89200fe1 100644
--- a/model/reward/instructor/configs/electra-base-dis-webgpt.yml
+++ b/model/reward/instructor/configs/electra-base-dis-webgpt.yml
@@ -1,3 +1,3 @@
 model_name: google/electra-large-discriminator
 learning_rate: 3e-5
-max_length: 300
\ No newline at end of file
+max_length: 300
diff --git a/model/reward/instructor/configs/galactica-125m.yml b/model/reward/instructor/configs/galactica-125m.yml
index 55e093f5..13dbdfbe 100644
--- a/model/reward/instructor/configs/galactica-125m.yml
+++ b/model/reward/instructor/configs/galactica-125m.yml
@@ -10,4 +10,4 @@ max_length: 512
 num_train_epochs: 2
 datasets:
   - webgpt
-  - hfsummary
\ No newline at end of file
+  - hfsummary
diff --git a/model/reward/instructor/configs/galactica-1b.yml b/model/reward/instructor/configs/galactica-1b.yml
index 5a094520..8ffd74e9 100644
--- a/model/reward/instructor/configs/galactica-1b.yml
+++ b/model/reward/instructor/configs/galactica-1b.yml
@@ -11,4 +11,4 @@ max_length: 400
 num_train_epochs: 2
 datasets:
   - webgpt
-  - hfsummary
\ No newline at end of file
+  - hfsummary
diff --git a/model/reward/instructor/configs/test-galactica-125m-classification.yml b/model/reward/instructor/configs/test-galactica-125m-classification.yml
index 1ad1f47c..e36efcf3 100644
--- a/model/reward/instructor/configs/test-galactica-125m-classification.yml
+++ b/model/reward/instructor/configs/test-galactica-125m-classification.yml
@@ -11,4 +11,4 @@ max_length: 128
 num_train_epochs: 2
 datasets:
   - webgpt
-  - hfsummary
\ No newline at end of file
+  - hfsummary
diff --git a/model/reward/instructor/experimental_dataset.py b/model/reward/instructor/experimental_dataset.py
index 47d20d64..28f62967 100644
--- a/model/reward/instructor/experimental_dataset.py
+++ b/model/reward/instructor/experimental_dataset.py
@@ -1,4 +1,5 @@
-'''
+# -*- coding: utf-8 -*-
+"""
     HFSummary
 
         I want to train a multi regression model on axis_evals dataset mainly we can estimate the score of these score
@@ -7,15 +8,16 @@
 
         Should be better than just a preference score
 
-'''
-import torch
-from typing import Optional, Union
-import numpy as np
+"""
 from collections import defaultdict
-from datasets import load_dataset
 from dataclasses import dataclass
+from typing import Optional, Union
+
+import numpy as np
+import torch
+from datasets import load_dataset
 from torch.utils.data import Dataset
-from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from transformers.tokenization_utils_base import PaddingStrategy, PreTrainedTokenizerBase
 
 
 @dataclass
@@ -25,12 +27,13 @@ class DataCollatorForSummaryScore:
     Data collator that will dynamically pad the inputs for multiple choice received.
 
     """
+
     tokenizer: PreTrainedTokenizerBase
     num_choices: int = 2
     padding: Union[bool, str, PaddingStrategy] = True
     max_length: Optional[int] = None
     pad_to_multiple_of: Optional[int] = None
-    drop_token_type: bool = False # galactica
+    drop_token_type: bool = False  # galactica
 
     def __call__(self, batch):
 
@@ -48,17 +51,17 @@ class DataCollatorForSummaryScore:
             return_tensors="pt",
         )
         if self.drop_token_type:
-            batch_feature.pop('token_type_ids')
+            batch_feature.pop("token_type_ids")
         # batch = {k: v.view(batch_size, self.num_choices, -1) for k, v in batch.items()}
-        batch_feature['labels'] = torch.from_numpy(np.array(labels)).float()
+        batch_feature["labels"] = torch.from_numpy(np.array(labels)).float()
         return batch_feature
 
 
 class HFSummaryQuality(Dataset):
     def __init__(self, split, tokenizer, max_length=300) -> None:
         super().__init__()
-        assert split in ('validation', 'test')
-        dataset = load_dataset('Tristan/summarize_from_feedback', 'axis')[split]
+        assert split in ("validation", "test")
+        dataset = load_dataset("Tristan/summarize_from_feedback", "axis")[split]
         self.max_length = max_length
         mean_scores = defaultdict(list)
         self.contexts = []
@@ -66,22 +69,21 @@ class HFSummaryQuality(Dataset):
         self.labels = []
         for data in dataset:
 
-            if 'article' in data['info'] and \
-                data['info']['article'] is not None:
-                context = data['info']['article']
-            elif 'post' in data['info']:
-                context = data['info']['post']
+            if "article" in data["info"] and data["info"]["article"] is not None:
+                context = data["info"]["article"]
+            elif "post" in data["info"]:
+                context = data["info"]["post"]
             self.contexts.append(context)
 
-            response = data['summary']['text']
+            response = data["summary"]["text"]
             self.responses.append(response)
-            self.labels.append(data['summary']['axes'])
-            for axis, score in data['summary']['axes'].items():
+            self.labels.append(data["summary"]["axes"])
+            for axis, score in data["summary"]["axes"].items():
                 if score is not None:
                     mean_scores[axis].append(score)
 
-        self.label2idx = { key: idx for idx, key in enumerate(mean_scores.keys()) }
-        self.label2mean = { key: np.mean(scores) for key, scores in mean_scores.items() }
+        self.label2idx = {key: idx for idx, key in enumerate(mean_scores.keys())}
+        self.label2mean = {key: np.mean(scores) for key, scores in mean_scores.items()}
         self.tokenizer = tokenizer
         print(self.label2idx)
 
@@ -94,7 +96,5 @@ class HFSummaryQuality(Dataset):
         response = self.responses[index]
         labels = np.zeros(len(self.label2idx))
         for key, score in self.labels[index].items():
-            labels[self.label2idx[key]] = (self.label2mean[key] if score is None else score)/10
+            labels[self.label2idx[key]] = (self.label2mean[key] if score is None else score) / 10
         return self.tokenizer(context, response, truncation=True, max_length=self.max_length), labels
-
-
diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py
index f38885e4..99ba9955 100644
--- a/model/reward/instructor/rank_datasets.py
+++ b/model/reward/instructor/rank_datasets.py
@@ -1,4 +1,5 @@
-'''
+# -*- coding: utf-8 -*-
+"""
     author: theblackcat102
 
     Dataset output format from __getitem__
@@ -17,13 +18,15 @@
         inferior than the human perference one
 
 
-'''
-from typing import Optional, Union
+"""
 from dataclasses import dataclass
+from typing import Optional, Union
+
 import numpy as np
-from torch.utils.data import Dataset
 from datasets import load_dataset
-from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from torch.utils.data import Dataset
+from transformers.tokenization_utils_base import PaddingStrategy, PreTrainedTokenizerBase
+
 
 @dataclass
 class DataCollatorForPairRank:
@@ -32,12 +35,13 @@ class DataCollatorForPairRank:
     Data collator that will dynamically pad the inputs for multiple choice received.
 
     """
+
     tokenizer: PreTrainedTokenizerBase
     num_choices: int = 2
     padding: Union[bool, str, PaddingStrategy] = True
     max_length: Optional[int] = None
     pad_to_multiple_of: Optional[int] = None
-    drop_token_type: bool = False # galactica
+    drop_token_type: bool = False  # galactica
 
     def __call__(self, features):
 
@@ -45,12 +49,10 @@ class DataCollatorForPairRank:
         batch_size = 0
         for question, pairs in features:
             for (pos, neg) in pairs:
-                flatten_features.append(self.tokenizer(question, pos,
-                    truncation=True, max_length=self.max_length))
-                flatten_features.append(self.tokenizer(question, neg,
-                    truncation=True, max_length=self.max_length))
+                flatten_features.append(self.tokenizer(question, pos, truncation=True, max_length=self.max_length))
+                flatten_features.append(self.tokenizer(question, neg, truncation=True, max_length=self.max_length))
                 batch_size += 1
-        
+
         batch = self.tokenizer.pad(
             flatten_features,
             padding=self.padding,
@@ -59,13 +61,12 @@ class DataCollatorForPairRank:
             return_tensors="pt",
         )
         if self.drop_token_type:
-            batch.pop('token_type_ids')
+            batch.pop("token_type_ids")
         # batch = {k: v.view(batch_size, self.num_choices, -1) for k, v in batch.items()}
         return batch
 
 
 class WebGPT(Dataset):
-
     def __init__(self) -> None:
         super().__init__()
 
@@ -74,23 +75,19 @@ class WebGPT(Dataset):
         # using prompt as our index will allows us
         # to add additional generated prompt later
         self.index2question = {}
-        for row in dataset['train']:
-            question = row['question']['full_text']
+        for row in dataset["train"]:
+            question = row["question"]["full_text"]
             if question not in self.index2question:
                 self.index2question[len(self.index2question)] = question
 
             if question not in questions:
                 questions[question] = []
 
-            if row['score_0'] > row['score_1']:
+            if row["score_0"] > row["score_1"]:
                 # not going to risk it
-                questions[question].append((
-                    row['answer_0'], row['answer_1']
-                ))
+                questions[question].append((row["answer_0"], row["answer_1"]))
             else:
-                questions[question].append((
-                    row['answer_1'], row['answer_0']
-                ))
+                questions[question].append((row["answer_1"], row["answer_0"]))
 
         self.questions = questions
 
@@ -104,61 +101,55 @@ class WebGPT(Dataset):
         return question, rows
 
 
-
-
 class HFSummary(Dataset):
-    '''
-        Human feedback data from OpenAI
-        https://github.com/openai/summarize-from-feedback
-        
-        labeling method : pair comparison, 0 or 1
+    """
+    Human feedback data from OpenAI
+    https://github.com/openai/summarize-from-feedback
 
-    '''
-    def __init__(self, split='train',
-        conf_threshold=-1,
-        max_comparison_per_sample=3) -> None:
+    labeling method : pair comparison, 0 or 1
+
+    """
+
+    def __init__(self, split="train", conf_threshold=-1, max_comparison_per_sample=3) -> None:
         super().__init__()
-        assert split in ('train', 'valid1', 'valid2', 'test')
+        assert split in ("train", "valid1", "valid2", "test")
         summaries = {}
         # using prompt as our index will allows us
         # to add additional generated prompt later
         self.index2summary = {}
         self.max_comparison_per_sample = max_comparison_per_sample
-        major_split = split if 'train' == split else 'validation'
-        dataset = load_dataset('Tristan/summarize_from_feedback', 'comparisons')[major_split]
+        major_split = split if "train" == split else "validation"
+        dataset = load_dataset("Tristan/summarize_from_feedback", "comparisons")[major_split]
         for data in dataset:
-            if 'extra' in data and \
-                'confidence' in data['extra'] and \
-                data['extra']['confidence'] is not None and \
-                conf_threshold > data['extra']['confidence']:
-                print('skipping {}'.format(data['info']['id']))
+            if (
+                "extra" in data
+                and "confidence" in data["extra"]
+                and data["extra"]["confidence"] is not None
+                and conf_threshold > data["extra"]["confidence"]
+            ):
+                print("skipping {}".format(data["info"]["id"]))
                 continue
 
-            if split != 'train' and split != data['split']:
+            if split != "train" and split != data["split"]:
                 continue
 
-            if 'article' in data['info'] and \
-                data['info']['article'] is not None:
-                context = data['info']['article']
-            elif 'post' in data['info']:
-                context = data['info']['post']
-
+            if "article" in data["info"] and data["info"]["article"] is not None:
+                context = data["info"]["article"]
+            elif "post" in data["info"]:
+                context = data["info"]["post"]
 
             if context not in self.index2summary:
                 self.index2summary[len(self.index2summary)] = context
-            
+
             if context not in summaries:
                 summaries[context] = []
 
-            pos, neg = (0, 1) if data['choice'] == 0 else (1, 0)
-            summaries[context].append((
-                data['summaries'][pos]['text'],
-                data['summaries'][neg]['text']
-            ))
+            pos, neg = (0, 1) if data["choice"] == 0 else (1, 0)
+            summaries[context].append((data["summaries"][pos]["text"], data["summaries"][neg]["text"]))
 
         self.summaries = summaries
 
-        self.postfix_prompt = ' TLDR;'
+        self.postfix_prompt = " TLDR;"
 
     def __len__(self):
         return len(self.index2summary)
@@ -172,5 +163,4 @@ class HFSummary(Dataset):
         # not optimal but good for now
         valid_idx = np.random.choice(len(rows), self.max_comparison_per_sample)
         # optimize the format later
-        return context+self.postfix_prompt, [ r for idx, r in enumerate(rows) if idx in valid_idx ]
-
+        return context + self.postfix_prompt, [r for idx, r in enumerate(rows) if idx in valid_idx]
diff --git a/model/reward/instructor/summary_quality_trainer.py b/model/reward/instructor/summary_quality_trainer.py
index a6604819..88bf1abf 100644
--- a/model/reward/instructor/summary_quality_trainer.py
+++ b/model/reward/instructor/summary_quality_trainer.py
@@ -1,46 +1,72 @@
+# -*- coding: utf-8 -*-
 import os
-os.environ['WANDB_PROJECT'] = 'quality-scoring'
-import torch
-import yaml
-import evaluate
-from typing import Any, Callable, List, Optional, Tuple, Union, Dict
-from torch import nn
 from argparse import ArgumentParser
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+import evaluate
 import numpy as np
+import torch
+from experimental_dataset import DataCollatorForSummaryScore, HFSummaryQuality
+from torch import nn
 from torch.utils.data import Dataset
-from transformers import AutoModelForSequenceClassification
-from transformers import Trainer, PreTrainedModel, TrainingArguments, DataCollator, EvalPrediction, TrainerCallback, PreTrainedTokenizerBase
-from experimental_dataset import HFSummaryQuality, DataCollatorForSummaryScore
-from utils import get_tokenizer, train_val_dataset, freeze_top_n_layers, argument_parsing
+from transformers import (
+    AutoModelForSequenceClassification,
+    DataCollator,
+    EvalPrediction,
+    PreTrainedModel,
+    PreTrainedTokenizerBase,
+    Trainer,
+    TrainerCallback,
+    TrainingArguments,
+)
+from utils import argument_parsing, freeze_top_n_layers, get_tokenizer
+
+os.environ["WANDB_PROJECT"] = "quality-scoring"
 
 parser = ArgumentParser()
-parser.add_argument('config', type=str)
+parser.add_argument("config", type=str)
 
 accuracy = evaluate.load("mse")
+
+
 def compute_metrics(eval_pred):
     predictions, labels = eval_pred
     return accuracy.compute(predictions=predictions.flatten(), references=labels.flatten())
 
 
 class QualityTrainer(Trainer):
-    def __init__(self, model: Union[PreTrainedModel, nn.Module] = None,
-                 args: TrainingArguments = None,
-                 data_collator: Optional[DataCollator] = None,
-                 train_dataset: Optional[Dataset] = None,
-                 eval_dataset: Optional[Dataset] = None,
-                 tokenizer: Optional[PreTrainedTokenizerBase] = None,
-                 model_init: Callable[[], PreTrainedModel] = None,
-                 compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
-                 callbacks: Optional[List[TrainerCallback]] = None,
-                 optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
-                 preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] = None):
-        super().__init__(model, args, data_collator, train_dataset, eval_dataset, tokenizer,
-                         model_init, compute_metrics, callbacks, optimizers, preprocess_logits_for_metrics)
+    def __init__(
+        self,
+        model: Union[PreTrainedModel, nn.Module] = None,
+        args: TrainingArguments = None,
+        data_collator: Optional[DataCollator] = None,
+        train_dataset: Optional[Dataset] = None,
+        eval_dataset: Optional[Dataset] = None,
+        tokenizer: Optional[PreTrainedTokenizerBase] = None,
+        model_init: Callable[[], PreTrainedModel] = None,
+        compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
+        callbacks: Optional[List[TrainerCallback]] = None,
+        optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
+        preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] = None,
+    ):
+        super().__init__(
+            model,
+            args,
+            data_collator,
+            train_dataset,
+            eval_dataset,
+            tokenizer,
+            model_init,
+            compute_metrics,
+            callbacks,
+            optimizers,
+            preprocess_logits_for_metrics,
+        )
         self.loss_fct = nn.L1Loss()
         self.sigmoid = nn.Sigmoid()
 
     def compute_loss(self, model, inputs, return_outputs=False):
-        labels = inputs.pop('labels')
+        labels = inputs.pop("labels")
         # forward pass
         outputs = model(**inputs)
         logits = self.sigmoid(outputs.get("logits"))
@@ -50,75 +76,73 @@ class QualityTrainer(Trainer):
 
     def _compute_loss(self, model, inputs):
         inputs = self._prepare_inputs(inputs)
-        labels = inputs.pop('labels')
+        labels = inputs.pop("labels")
         outputs = model(**inputs)
         logits = self.sigmoid(outputs.get("logits"))
         loss = self.loss_fct(logits, labels)
 
         return loss, logits
 
-    def prediction_step(self, model: nn.Module,
-            inputs: Dict[str, Union[torch.Tensor, Any]],
-            prediction_loss_only: bool,
-            ignore_keys: Optional[List[str]] = None) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
+    def prediction_step(
+        self,
+        model: nn.Module,
+        inputs: Dict[str, Union[torch.Tensor, Any]],
+        prediction_loss_only: bool,
+        ignore_keys: Optional[List[str]] = None,
+    ) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
 
         with torch.no_grad():
             # compute loss on predict data
             loss, logits = self._compute_loss(model, inputs)
 
         loss = loss.mean().detach()
-        labels = inputs['labels']
+        labels = inputs["labels"]
         if self.args.prediction_loss_only:
             return (loss, None, None)
 
         return (loss, logits, labels)
 
+
 if __name__ == "__main__":
     training_conf = argument_parsing(parser)
 
-    model_name = training_conf['model_name']
+    model_name = training_conf["model_name"]
     tokenizer = get_tokenizer(model_name)
-    collate_fn = DataCollatorForSummaryScore(tokenizer,
-        max_length=training_conf['max_length'],
-        drop_token_type= 'galactica' in model_name
+    collate_fn = DataCollatorForSummaryScore(
+        tokenizer, max_length=training_conf["max_length"], drop_token_type="galactica" in model_name
+    )
+    train = HFSummaryQuality(split="validation", tokenizer=tokenizer, max_length=training_conf["max_length"])
+    eval = HFSummaryQuality(split="test", tokenizer=tokenizer, max_length=training_conf["max_length"])
+    model = AutoModelForSequenceClassification.from_pretrained(
+        model_name, num_labels=len(train.label2idx), problem_type="regression"
     )
-    train = HFSummaryQuality(split='validation',
-        tokenizer=tokenizer,
-        max_length=training_conf['max_length']
-        )
-    eval = HFSummaryQuality(split='test',
-            tokenizer=tokenizer,
-            max_length=training_conf['max_length']
-        )
-    model = AutoModelForSequenceClassification.from_pretrained(model_name,
-        num_labels=len(train.label2idx), problem_type='regression')
 
-    if 'freeze_layer' in training_conf:
-        num_layer = training_conf['freeze_layer']
+    if "freeze_layer" in training_conf:
+        num_layer = training_conf["freeze_layer"]
         model = freeze_top_n_layers(model, num_layer)
         model_parameters = filter(lambda p: p.requires_grad, model.parameters())
         params = sum([np.prod(p.size()) for p in model_parameters])
-        print('Number of trainable : {}M'.format(int(params/1e6)))
+        print("Number of trainable : {}M".format(int(params / 1e6)))
 
     args = TrainingArguments(
         output_dir=f"{model_name}-finetuned",
-        num_train_epochs=training_conf['num_train_epochs'],
+        num_train_epochs=training_conf["num_train_epochs"],
         warmup_steps=500,
-        learning_rate=training_conf['learning_rate'],
+        learning_rate=training_conf["learning_rate"],
         # half_precision_backend="apex",
         fp16=True,
-        gradient_checkpointing=training_conf['gradient_checkpointing'],
-        gradient_accumulation_steps=training_conf['gradient_accumulation_steps'],
-        per_device_train_batch_size=training_conf['per_device_train_batch_size'],
-        per_device_eval_batch_size=training_conf['per_device_eval_batch_size'],
+        gradient_checkpointing=training_conf["gradient_checkpointing"],
+        gradient_accumulation_steps=training_conf["gradient_accumulation_steps"],
+        per_device_train_batch_size=training_conf["per_device_train_batch_size"],
+        per_device_eval_batch_size=training_conf["per_device_eval_batch_size"],
         weight_decay=0.01,
         max_grad_norm=2.0,
         logging_steps=10,
         save_total_limit=4,
-        evaluation_strategy='steps',
-        eval_steps=training_conf['eval_steps'],
+        evaluation_strategy="steps",
+        eval_steps=training_conf["eval_steps"],
         save_steps=1000,
-        report_to='wandb'
+        report_to="wandb",
     )
     trainer = QualityTrainer(
         model,
@@ -127,6 +151,6 @@ if __name__ == "__main__":
         eval_dataset=eval,
         data_collator=collate_fn,
         tokenizer=tokenizer,
-        compute_metrics=compute_metrics
+        compute_metrics=compute_metrics,
     )
     trainer.train()
diff --git a/model/reward/instructor/tests/test_dataset.py b/model/reward/instructor/tests/test_dataset.py
index 271db83c..f367a50d 100644
--- a/model/reward/instructor/tests/test_dataset.py
+++ b/model/reward/instructor/tests/test_dataset.py
@@ -1,40 +1,41 @@
-from transformers import AutoTokenizer
+# -*- coding: utf-8 -*-
+from experimental_dataset import DataCollatorForSummaryScore, HFSummaryQuality
+from rank_datasets import DataCollatorForPairRank, HFSummary, WebGPT
 from torch.utils.data import DataLoader
-from rank_datasets import WebGPT, HFSummary, DataCollatorForPairRank
-from experimental_dataset import HFSummaryQuality, DataCollatorForSummaryScore
+from transformers import AutoTokenizer
+
 
 def test_hfsummary():
-    
+
     tokenizer = AutoTokenizer.from_pretrained("bigscience/mt0-large")
     collate_fn = DataCollatorForPairRank(tokenizer, max_length=200)
-    dataset = HFSummary('train')
+    dataset = HFSummary("train")
     print(len(dataset))
     dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=8)
     for batch in dataloader:
-        batch['input_ids'].shape
- 
+        batch["input_ids"].shape
+
 
 def test_webgpt():
-    
+
     tokenizer = AutoTokenizer.from_pretrained("bigscience/mt0-large")
     collate_fn = DataCollatorForPairRank(tokenizer, max_length=200)
     dataset = WebGPT()
     dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=32)
     for batch in dataloader:
-        print(batch['input_ids'].shape)
+        print(batch["input_ids"].shape)
 
 
 def test_hf_quality():
 
     tokenizer = AutoTokenizer.from_pretrained("bigscience/mt0-large")
     collate_fn = DataCollatorForSummaryScore(tokenizer, max_length=200)
-    dataset = HFSummaryQuality('validation', tokenizer)
+    dataset = HFSummaryQuality("validation", tokenizer)
     dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=32)
     for batch in dataloader:
-        print(batch['input_ids'].shape)
-
+        print(batch["input_ids"].shape)
 
 
 if __name__ == "__main__":
     test_hf_quality()
-    # test_webgpt()
\ No newline at end of file
+    # test_webgpt()
diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py
index c8063cf7..0e98e4c5 100644
--- a/model/reward/instructor/trainer.py
+++ b/model/reward/instructor/trainer.py
@@ -1,32 +1,44 @@
+# -*- coding: utf-8 -*-
 import os
-os.environ['WANDB_PROJECT'] = 'reward-model'
-import torch
-import yaml
-import evaluate
-from typing import Any, Callable, List, Optional, Tuple, Union, Dict
-from torch import nn
 from argparse import ArgumentParser
-import numpy as np
 from dataclasses import dataclass
-from torch.utils.data import Dataset, ConcatDataset
-from transformers import AutoModelForSequenceClassification
-from transformers import Trainer, PreTrainedModel, TrainingArguments, DataCollator, EvalPrediction, TrainerCallback, PreTrainedTokenizerBase
-from rank_datasets import DataCollatorForPairRank, WebGPT, HFSummary
-from utils import get_tokenizer, train_val_dataset, freeze_top_n_layers, argument_parsing
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+import evaluate
+import numpy as np
+import torch
+from rank_datasets import DataCollatorForPairRank, HFSummary, WebGPT
+from torch import nn
+from torch.utils.data import ConcatDataset, Dataset
+from transformers import (
+    AutoModelForSequenceClassification,
+    DataCollator,
+    EvalPrediction,
+    PreTrainedModel,
+    PreTrainedTokenizerBase,
+    Trainer,
+    TrainerCallback,
+    TrainingArguments,
+)
+from utils import argument_parsing, freeze_top_n_layers, get_tokenizer, train_val_dataset
+
+os.environ["WANDB_PROJECT"] = "reward-model"
 
 accuracy = evaluate.load("accuracy")
 parser = ArgumentParser()
-parser.add_argument('config', type=str)
+parser.add_argument("config", type=str)
+
 
 @dataclass
 class CustomTrainingArguments(TrainingArguments):
-    loss_function: str='rank'
+    loss_function: str = "rank"
 
 
 def compute_metrics(eval_pred):
     predictions, _ = eval_pred
     predictions = np.argmax(predictions, axis=1)
-    return accuracy.compute(predictions=predictions, references=[0]*predictions.shape[0])
+    return accuracy.compute(predictions=predictions, references=[0] * predictions.shape[0])
+
 
 class RankLoss(nn.Module):
     def __init__(self, eps=1e-8) -> None:
@@ -39,27 +51,41 @@ class RankLoss(nn.Module):
 
 
 class RankTrainer(Trainer):
-    def __init__(self, model: Union[PreTrainedModel, nn.Module] = None,
-                 args: TrainingArguments = None,
-                 data_collator: Optional[DataCollator] = None,
-                 train_dataset: Optional[Dataset] = None,
-                 eval_dataset: Optional[Dataset] = None,
-                 tokenizer: Optional[PreTrainedTokenizerBase] = None,
-                 model_init: Callable[[], PreTrainedModel] = None,
-                 compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
-                 callbacks: Optional[List[TrainerCallback]] = None,
-                 optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
-                 preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] = None):
-        super().__init__(model, args, data_collator, train_dataset, eval_dataset, tokenizer,
-                         model_init, compute_metrics, callbacks, optimizers, preprocess_logits_for_metrics)
-        self.loss_fct = RankLoss() if args.loss_function == 'rank' else nn.CrossEntropyLoss()
+    def __init__(
+        self,
+        model: Union[PreTrainedModel, nn.Module] = None,
+        args: TrainingArguments = None,
+        data_collator: Optional[DataCollator] = None,
+        train_dataset: Optional[Dataset] = None,
+        eval_dataset: Optional[Dataset] = None,
+        tokenizer: Optional[PreTrainedTokenizerBase] = None,
+        model_init: Callable[[], PreTrainedModel] = None,
+        compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
+        callbacks: Optional[List[TrainerCallback]] = None,
+        optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
+        preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] = None,
+    ):
+        super().__init__(
+            model,
+            args,
+            data_collator,
+            train_dataset,
+            eval_dataset,
+            tokenizer,
+            model_init,
+            compute_metrics,
+            callbacks,
+            optimizers,
+            preprocess_logits_for_metrics,
+        )
+        self.loss_fct = RankLoss() if args.loss_function == "rank" else nn.CrossEntropyLoss()
         self.loss_function = args.loss_function
 
     def compute_loss(self, model, inputs, return_outputs=False):
         # forward pass
         outputs = model(**inputs)
         logits = outputs.get("logits").view(-1, 2)
-        if self.loss_function == 'rank':
+        if self.loss_function == "rank":
             loss = self.loss_fct(logits[:, 0], logits[:, 1])
         else:
             loss = self.loss_fct(logits, torch.zeros(logits.shape[0], device=logits.device, dtype=torch.long))
@@ -70,17 +96,20 @@ class RankTrainer(Trainer):
         inputs = self._prepare_inputs(inputs)
         outputs = model(**inputs)
         logits = outputs.get("logits").view(-1, 2)
-        if self.loss_function == 'rank':
+        if self.loss_function == "rank":
             loss = self.loss_fct(logits[:, 0], logits[:, 1])
         else:
             loss = self.loss_fct(logits, torch.zeros(logits.shape[0], device=logits.device, dtype=torch.long))
 
         return loss, logits
 
-    def prediction_step(self, model: nn.Module,
-            inputs: Dict[str, Union[torch.Tensor, Any]],
-            prediction_loss_only: bool,
-            ignore_keys: Optional[List[str]] = None) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
+    def prediction_step(
+        self,
+        model: nn.Module,
+        inputs: Dict[str, Union[torch.Tensor, Any]],
+        prediction_loss_only: bool,
+        ignore_keys: Optional[List[str]] = None,
+    ) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
 
         with torch.no_grad():
             # compute loss on predict data
@@ -93,54 +122,57 @@ class RankTrainer(Trainer):
 
         return (loss, logits, labels)
 
+
 if __name__ == "__main__":
     training_conf = argument_parsing(parser)
 
-    model_name = training_conf['model_name']
-    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type='regression')
-    if 'freeze_layer' in training_conf:
-        num_layer = training_conf['freeze_layer']
+    model_name = training_conf["model_name"]
+    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type="regression")
+    if "freeze_layer" in training_conf:
+        num_layer = training_conf["freeze_layer"]
         model = freeze_top_n_layers(model, num_layer)
         model_parameters = filter(lambda p: p.requires_grad, model.parameters())
         params = sum([np.prod(p.size()) for p in model_parameters])
-        print('Number of trainable : {}M'.format(int(params/1e6)))
+        print("Number of trainable : {}M".format(int(params / 1e6)))
 
     tokenizer = get_tokenizer(model_name)
     args = CustomTrainingArguments(
         output_dir=f"{model_name}-finetuned",
-        num_train_epochs=training_conf['num_train_epochs'],
+        num_train_epochs=training_conf["num_train_epochs"],
         warmup_steps=500,
-        loss_function=training_conf['loss'],
-        learning_rate=training_conf['learning_rate'],
+        loss_function=training_conf["loss"],
+        learning_rate=training_conf["learning_rate"],
         # half_precision_backend="apex",
         fp16=True,
-        gradient_checkpointing=training_conf['gradient_checkpointing'],
-        gradient_accumulation_steps=training_conf['gradient_accumulation_steps'],
-        per_device_train_batch_size=training_conf['per_device_train_batch_size'],
-        per_device_eval_batch_size=training_conf['per_device_eval_batch_size'],
+        gradient_checkpointing=training_conf["gradient_checkpointing"],
+        gradient_accumulation_steps=training_conf["gradient_accumulation_steps"],
+        per_device_train_batch_size=training_conf["per_device_train_batch_size"],
+        per_device_eval_batch_size=training_conf["per_device_eval_batch_size"],
         weight_decay=0.01,
         max_grad_norm=2.0,
         logging_steps=10,
         save_total_limit=4,
-        evaluation_strategy='steps',
-        eval_steps=training_conf['eval_steps'],
+        evaluation_strategy="steps",
+        eval_steps=training_conf["eval_steps"],
         save_steps=1000,
-        report_to='wandb'
+        report_to="wandb",
     )
     train_datasets, evals = [], {}
-    if 'webgpt' in training_conf['datasets']:
+    if "webgpt" in training_conf["datasets"]:
         web_dataset = WebGPT()
         train, eval = train_val_dataset(web_dataset)
         train_datasets.append(train)
-        evals['webgpt'] = eval
-    if 'hfsummary' in training_conf['datasets']:
-        sum_train = HFSummary(split='train')
+        evals["webgpt"] = eval
+    if "hfsummary" in training_conf["datasets"]:
+        sum_train = HFSummary(split="train")
         train_datasets.append(sum_train)
-        sum_eval = HFSummary(split='valid1')
+        sum_eval = HFSummary(split="valid1")
         assert len(sum_eval) > 0
-        evals['hfsummary'] = sum_eval
+        evals["hfsummary"] = sum_eval
     train = ConcatDataset(train_datasets)
-    collate_fn = DataCollatorForPairRank(tokenizer, max_length=training_conf['max_length'], drop_token_type= 'galactica' in model_name)
+    collate_fn = DataCollatorForPairRank(
+        tokenizer, max_length=training_conf["max_length"], drop_token_type="galactica" in model_name
+    )
     assert len(evals) > 0
     trainer = RankTrainer(
         model,
@@ -149,6 +181,6 @@ if __name__ == "__main__":
         eval_dataset=eval,
         data_collator=collate_fn,
         tokenizer=tokenizer,
-        compute_metrics=compute_metrics
+        compute_metrics=compute_metrics,
     )
     trainer.train()
diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py
index d59bb13c..9441ddb9 100644
--- a/model/reward/instructor/utils.py
+++ b/model/reward/instructor/utils.py
@@ -1,96 +1,100 @@
+# -*- coding: utf-8 -*-
 import re
+
 import yaml
-from torch.utils.data import Subset
 from sklearn.model_selection import train_test_split
+from torch.utils.data import Subset
 from transformers import AutoTokenizer
 
-re_reference_remove = re.compile(r'\[([0-9])+\]|\[([0-9])+,([0-9])+\]')
+re_reference_remove = re.compile(r"\[([0-9])+\]|\[([0-9])+,([0-9])+\]")
+
 
 def webgpt_return_format(row):
-    if row['score_0'] >= row['score_1']:
+    if row["score_0"] >= row["score_1"]:
         # remove this to prevent information leak, since we are not using reference
         return {
-                'question': row['question']['full_text'],
-                     'pos': re_reference_remove.sub('', row['answer_0']),
-                     'neg': re_reference_remove.sub('', row['answer_1'])
-                }
+            "question": row["question"]["full_text"],
+            "pos": re_reference_remove.sub("", row["answer_0"]),
+            "neg": re_reference_remove.sub("", row["answer_1"]),
+        }
 
     return {
-            'question': row['question']['full_text'],
-                 'pos': re_reference_remove.sub('', row['answer_1']),
-                 'neg': re_reference_remove.sub('', row['answer_0'])
-            }
+        "question": row["question"]["full_text"],
+        "pos": re_reference_remove.sub("", row["answer_1"]),
+        "neg": re_reference_remove.sub("", row["answer_0"]),
+    }
 
 
 def get_tokenizer(tokenizer_name):
     tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
-    if 'galactica' in tokenizer_name:
-        tokenizer.add_special_tokens({'pad_token':'<pad>', 'eos_token': '</s>' })
+    if "galactica" in tokenizer_name:
+        tokenizer.add_special_tokens({"pad_token": "<pad>", "eos_token": "</s>"})
 
     return tokenizer
 
 
-
 def train_val_dataset(dataset, val_split=0.2):
-    train_idx, val_idx = train_test_split(list(range(len(dataset))), 
-        test_size=val_split, random_state=666, shuffle=True)
+    train_idx, val_idx = train_test_split(
+        list(range(len(dataset))), test_size=val_split, random_state=666, shuffle=True
+    )
     # [3879, 11479, 8341, 9177, 10798, 18177, 5735, 15669, 4837, 2760]
     print(val_idx[:10])
     # [13582, 5919, 11875, 7373, 19135, 13706, 8555, 15788, 15005, 15209]
     print(train_idx[:10])
     return Subset(dataset, train_idx), Subset(dataset, val_idx)
 
+
 def freeze_top_n_layers(model, target_layers):
     # its possible we can simply detect which module is a ModuleList
     # and simply freeze the module without doing string parsing
     for name, param in model.named_parameters():
-        if 'embed' in name:
+        if "embed" in name:
             param.requires_grad = False
-        elif '.layer' in name or '.h.' in name:
-            tokens = name.split('.')
+        elif ".layer" in name or ".h." in name:
+            tokens = name.split(".")
             idx = 0
             for token in tokens:
-                if 'layer' in token or token == 'h':
+                if "layer" in token or token == "h":
                     break
                 idx += 1
             if idx >= len(tokens):
                 continue
 
-            layer_ = int(tokens[idx+1])
+            layer_ = int(tokens[idx + 1])
             if layer_ < target_layers:
                 # print('freeze ', layer_, name)
                 param.requires_grad = False
     return model
 
+
 def argument_parsing(parser):
     default_params = {
-        'num_train_epochs': 4,
-        'learning_rate': 3e-5,
-        'eval_steps': 500,
-        'loss': 'rank',
-        'max_length': 440,
-        'per_device_eval_batch_size': 5,
-        'per_device_train_batch_size': 8,
-        'gradient_accumulation_steps': 8,
-        'gradient_checkpointing': False,
-        'datasets': ['webgpt']
+        "num_train_epochs": 4,
+        "learning_rate": 3e-5,
+        "eval_steps": 500,
+        "loss": "rank",
+        "max_length": 440,
+        "per_device_eval_batch_size": 5,
+        "per_device_train_batch_size": 8,
+        "gradient_accumulation_steps": 8,
+        "gradient_checkpointing": False,
+        "datasets": ["webgpt"],
     }
     args = parser.parse_args()
-    with open(args.config, 'r', encoding='utf-8') as f:
+    with open(args.config, "r", encoding="utf-8") as f:
         training_conf = yaml.safe_load(f.read())
 
-    params = { **default_params, **training_conf }
-    params['gradient_accumulation_steps'] = int(params['gradient_accumulation_steps'])
-    params['num_train_epochs'] = int(params['num_train_epochs'])
-    params['per_device_train_batch_size'] = int(params['per_device_train_batch_size'])
-    params['learning_rate'] = float(params['learning_rate'])
+    params = {**default_params, **training_conf}
+    params["gradient_accumulation_steps"] = int(params["gradient_accumulation_steps"])
+    params["num_train_epochs"] = int(params["num_train_epochs"])
+    params["per_device_train_batch_size"] = int(params["per_device_train_batch_size"])
+    params["learning_rate"] = float(params["learning_rate"])
     return params
 
 
-
 if __name__ == "__main__":
     from transformers import AutoModelForSequenceClassification
 
-    model = AutoModelForSequenceClassification.from_pretrained('bigscience/bloomz-560m')
+    model = AutoModelForSequenceClassification.from_pretrained("bigscience/bloomz-560m")
     freeze_top_n_layers(model, 10)
-    print(model.state_dict().keys())
\ No newline at end of file
+    print(model.state_dict().keys())

From 46892f67e7f6f54f1649783dc0afde29b27f020a Mon Sep 17 00:00:00 2001
From: Alex Ott <66271487+AlexanderHOtt@users.noreply.github.com>
Date: Sun, 1 Jan 2023 03:46:38 -0800
Subject: [PATCH 30/53] Add an error handler to the discord bot (#235)

* add error handler for the bot
---
 discord-bot/bot/bot.py | 77 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/discord-bot/bot/bot.py b/discord-bot/bot/bot.py
index a305946f..b2a2eb25 100644
--- a/discord-bot/bot/bot.py
+++ b/discord-bot/bot/bot.py
@@ -1,11 +1,14 @@
 # -*- coding: utf-8 -*-
 """Bot logic."""
+from datetime import datetime
+
 import aiosqlite
 import hikari
 import lightbulb
 import miru
 from bot.api_client import OasstApiClient
 from bot.settings import Settings
+from bot.utils import EMPTY, mention
 
 settings = Settings()
 
@@ -38,3 +41,77 @@ async def on_stopping(event: hikari.StoppingEvent):
     """Cleanup."""
     await bot.d.db.close()
     await bot.d.oasst_api.close()
+
+
+async def _send_error_embed(
+    content: str, exception: lightbulb.errors.LightbulbError | BaseException, ctx: lightbulb.Context
+) -> None:
+    ctx.command
+    embed = hikari.Embed(
+        title=f"`{exception.__class__.__name__}` Error{f' in `{ctx.command.name}`' if ctx.command else '' }",
+        description=content,
+        color=0xFF0000,
+        timestamp=datetime.now().astimezone(),
+    ).set_author(name=ctx.author.username, url=str(ctx.author.avatar_url))
+
+    await ctx.respond(EMPTY, embed=embed)
+
+
+@bot.listen(lightbulb.CommandErrorEvent)
+async def on_error(event: lightbulb.CommandErrorEvent) -> None:
+    """Error handler for the bot."""
+    # Unwrap the exception to get the original cause
+    exc = event.exception.__cause__ or event.exception
+    ctx = event.context
+
+    if isinstance(event.exception, lightbulb.CommandInvocationError):
+        if not event.context.command:
+            await _send_error_embed("Something went wrong", exc, ctx)
+        else:
+            await _send_error_embed(
+                f"Something went wrong during invocation of command `{event.context.command.name}`.", exc, ctx
+            )
+
+        raise event.exception
+
+    # Not an owner
+    if isinstance(exc, lightbulb.NotOwner):
+        await _send_error_embed("You are not the owner of this bot.", exc, ctx)
+    # Command is on cooldown
+    elif isinstance(exc, lightbulb.CommandIsOnCooldown):
+        await _send_error_embed(f"This command is on cooldown. Retry in `{exc.retry_after:.2f}` seconds.", exc, ctx)
+    # Missing permissions
+    elif isinstance(exc, lightbulb.errors.MissingRequiredPermission):
+        await _send_error_embed(
+            f"You do not have permission to use this command. Missing permissions: {exc.missing_perms}", exc, ctx
+        )
+    # Missing roles
+    elif isinstance(exc, lightbulb.errors.MissingRequiredRole):
+        assert event.context.guild_id is not None  # Roles only exist in guilds
+        await _send_error_embed(
+            f"You do not have the correct role to use this command. Missing role(s): {[mention(r, 'role') for r in exc.missing_roles]}",
+            exc,
+            ctx,
+        )
+    # Only a guild command
+    elif isinstance(exc, lightbulb.errors.OnlyInGuild):
+        await _send_error_embed("This command can only be run in servers.", exc, ctx)
+    # Only a DM command
+    elif isinstance(exc, lightbulb.errors.OnlyInDM):
+        await _send_error_embed("This command can only be run in DMs.", exc, ctx)
+    # Not enough arguments
+    elif isinstance(exc, lightbulb.errors.NotEnoughArguments):
+        await _send_error_embed(
+            f"Not enough arguments were supplied to the command. {[opt.name for opt in exc.missing_options]}", exc, ctx
+        )
+    # Bot missing permission
+    elif isinstance(exc, lightbulb.errors.BotMissingRequiredPermission):
+        await _send_error_embed(
+            f"The bot does not have the correct permission(s) to execute this command. Missing permissions: {exc.missing_perms}",
+            exc,
+            ctx,
+        )
+    elif isinstance(exc, lightbulb.errors.MissingRequiredAttachment):
+        await _send_error_embed("Not enough attachemnts were supplied to this command.", exc, ctx)
+    else:
+        raise exc

From 4d01704618e5588cd55a09857558355bf99abc10 Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sun, 1 Jan 2023 11:56:54 +0000
Subject: [PATCH 31/53] [fix] rerun pre-commit

---
 model/reward/instructor/README.md             |  11 +-
 model/reward/instructor/TODO.md               |  18 +--
 model/reward/instructor/cls_dataset.py        |  37 +++--
 .../configs/bloomz-560m-summary.yml           |   2 +-
 .../reward/instructor/configs/bloomz-560m.yml |   2 +-
 .../configs/electra-base-dis-webgpt.yml       |   2 +-
 .../instructor/configs/galactica-125m.yml     |   2 +-
 .../instructor/configs/galactica-1b.yml       |   2 +-
 .../test-galactica-125m-classification.yml    |   2 +-
 .../reward/instructor/experimental_dataset.py |  50 +++---
 model/reward/instructor/rank_datasets.py      | 104 ++++++------
 .../instructor/summary_quality_trainer.py     | 140 ++++++++++-------
 model/reward/instructor/tests/test_dataset.py |  27 ++--
 model/reward/instructor/trainer.py            | 148 +++++++++++-------
 model/reward/instructor/utils.py              |  84 +++++-----
 15 files changed, 337 insertions(+), 294 deletions(-)

diff --git a/model/reward/instructor/README.md b/model/reward/instructor/README.md
index 31c25371..73a872a0 100644
--- a/model/reward/instructor/README.md
+++ b/model/reward/instructor/README.md
@@ -2,7 +2,6 @@
 
 Trainer code based on huggingface. Compatible with deepspeed or accelerate
 
-
 Requirements
 
 ```
@@ -15,12 +14,10 @@ torch==1.12
 
 Start training reward model
 
-
 ```bash
 python trainer.py configs/electra-base-dis-webgpt.yml
 ```
 
-
 Additional axis labeling, this outputs a 4 summary quality evaluation metrics (score are normalized to 0-1 )
 
 ```bash
@@ -29,13 +26,13 @@ python summary_quality_trainer.py configs/test-bloomz-560m-quality.yml
 
 The four summary are :
 
-* overall
+- overall
 
-* accuracy
+- accuracy
 
-* coverage
+- coverage
 
-* coherence
+- coherence
 
 ## Dataset
 
diff --git a/model/reward/instructor/TODO.md b/model/reward/instructor/TODO.md
index 1e653922..ed33b3c0 100644
--- a/model/reward/instructor/TODO.md
+++ b/model/reward/instructor/TODO.md
@@ -1,23 +1,19 @@
-
 Some other reward features we can use
 
-0. Finish classifcation feature 
+0. Finish classifcation feature
 
 1. Summaries from human feedback
 
-* use `confidence` score into the RM learning, ensure the output rank score correlates with confidence
+- use `confidence` score into the RM learning, ensure the output rank score correlates with confidence
 
-* each labeling has a labeling `note`, basically comments by labeler, not sure what else we can use
+- each labeling has a labeling `note`, basically comments by labeler, not sure what else we can use
 
-* ~~Use the score for "overall", "accuracy", "coverage", "coherence" from axis/evals to train an addition model (rank additional aspect of the policy model)~~
-
-    * this should be placed under experimental_dataset.py
+- ~~Use the score for "overall", "accuracy", "coverage", "coherence" from axis/evals to train an addition model (rank additional aspect of the policy model)~~
 
+  - this should be placed under experimental_dataset.py
 
 2. Add support for anthropic dataset
 
-* anthropic dataset is more like a conversation tree which is much complex than simply question-answer schema
-
-    * this is basically a MCTS from alphazero.
-
+- anthropic dataset is more like a conversation tree which is much complex than simply question-answer schema
 
+  - this is basically a MCTS from alphazero.
diff --git a/model/reward/instructor/cls_dataset.py b/model/reward/instructor/cls_dataset.py
index ff824d19..09aa821b 100644
--- a/model/reward/instructor/cls_dataset.py
+++ b/model/reward/instructor/cls_dataset.py
@@ -1,32 +1,34 @@
-'''
+# -*- coding: utf-8 -*-
+"""
 
     classification based ranking
 
-'''
-import os
+"""
 import json
+import os
 import random
-import torch
-import numpy as np
+
 from dataset import load_dataset
 from torch.utils.data import Dataset
+
 from .utils import webgpt_return_format
 
+
 class WebGPTDataset(Dataset):
-    def __init__(self, mode='train', index_cache='dataset/webgpt_train_idx.pt', additional_dataset=None) -> None:
+    def __init__(self, mode="train", index_cache="dataset/webgpt_train_idx.pt", additional_dataset=None) -> None:
         super().__init__()
-        '''
+        """
             mode : train or val, used for validation purpose, has nothing to do with original split
             additional_dataset : a list of jsonline format with idx, question and texts (generate candidates)
                 idx : must match the index you iterate from comparison enumerate order
                 question : for validation purpose
                 texts : list of K generate results from the question prompt
-        '''
-        os.makedirs('dataset', exist_ok=True)
+        """
+        os.makedirs("dataset", exist_ok=True)
         dataset = load_dataset("openai/webgpt_comparisons")
         self.dataset = []
         self.dataset_index = []
-        for idx, row in enumerate(dataset['train']):
+        for idx, row in enumerate(dataset["train"]):
             self.dataset.append(webgpt_return_format(row))
 
         # since this dataset was generated from 176B GPT-3
@@ -36,17 +38,17 @@ class WebGPTDataset(Dataset):
         if additional_dataset is not None:
             self.sample_additional = True
             self.additional = {}
-            with open(additional_dataset, 'r') as f:
+            with open(additional_dataset, "r") as f:
                 for line in f:
                     row = json.loads(line)
-                    if row['idx'] in self.dataset_index:
-                        self.additional[row['idx']] = row['negatives']
+                    if row["idx"] in self.dataset_index:
+                        self.additional[row["idx"]] = row["negatives"]
             if len(self.additional) != len(self.dataset_index):
                 for match_idx in self.dataset_index:
                     if match_idx in self.additional:
                         continue
 
-                    idx = match_idx-900
+                    idx = match_idx - 900
                     while idx not in self.additional:
                         idx -= 1
                     self.additional[match_idx] = self.additional[idx]
@@ -57,10 +59,7 @@ class WebGPTDataset(Dataset):
     def __getitem__(self, index):
         row = self.dataset[index]
         if not self.sample_additional:
-            return row['question'], row['pos'], row['neg']
+            return row["question"], row["pos"], row["neg"]
 
         gen_neg = random.choice(self.additional[self.dataset_index[index]])
-        return row['question'], row['pos'], row['neg'], gen_neg
-
-
-
+        return row["question"], row["pos"], row["neg"], gen_neg
diff --git a/model/reward/instructor/configs/bloomz-560m-summary.yml b/model/reward/instructor/configs/bloomz-560m-summary.yml
index a02f4e4a..55ed6cd1 100644
--- a/model/reward/instructor/configs/bloomz-560m-summary.yml
+++ b/model/reward/instructor/configs/bloomz-560m-summary.yml
@@ -6,4 +6,4 @@ max_length: 600
 freeze_layer: 12
 num_train_epochs: 2
 datasets:
-  - hfsummary
\ No newline at end of file
+  - hfsummary
diff --git a/model/reward/instructor/configs/bloomz-560m.yml b/model/reward/instructor/configs/bloomz-560m.yml
index c8f55746..bf3f14dd 100644
--- a/model/reward/instructor/configs/bloomz-560m.yml
+++ b/model/reward/instructor/configs/bloomz-560m.yml
@@ -7,4 +7,4 @@ freeze_layer: 12
 num_train_epochs: 2
 datasets:
   - webgpt
-  - hfsummary
\ No newline at end of file
+  - hfsummary
diff --git a/model/reward/instructor/configs/electra-base-dis-webgpt.yml b/model/reward/instructor/configs/electra-base-dis-webgpt.yml
index fc168b63..89200fe1 100644
--- a/model/reward/instructor/configs/electra-base-dis-webgpt.yml
+++ b/model/reward/instructor/configs/electra-base-dis-webgpt.yml
@@ -1,3 +1,3 @@
 model_name: google/electra-large-discriminator
 learning_rate: 3e-5
-max_length: 300
\ No newline at end of file
+max_length: 300
diff --git a/model/reward/instructor/configs/galactica-125m.yml b/model/reward/instructor/configs/galactica-125m.yml
index 55e093f5..13dbdfbe 100644
--- a/model/reward/instructor/configs/galactica-125m.yml
+++ b/model/reward/instructor/configs/galactica-125m.yml
@@ -10,4 +10,4 @@ max_length: 512
 num_train_epochs: 2
 datasets:
   - webgpt
-  - hfsummary
\ No newline at end of file
+  - hfsummary
diff --git a/model/reward/instructor/configs/galactica-1b.yml b/model/reward/instructor/configs/galactica-1b.yml
index 5a094520..8ffd74e9 100644
--- a/model/reward/instructor/configs/galactica-1b.yml
+++ b/model/reward/instructor/configs/galactica-1b.yml
@@ -11,4 +11,4 @@ max_length: 400
 num_train_epochs: 2
 datasets:
   - webgpt
-  - hfsummary
\ No newline at end of file
+  - hfsummary
diff --git a/model/reward/instructor/configs/test-galactica-125m-classification.yml b/model/reward/instructor/configs/test-galactica-125m-classification.yml
index 1ad1f47c..e36efcf3 100644
--- a/model/reward/instructor/configs/test-galactica-125m-classification.yml
+++ b/model/reward/instructor/configs/test-galactica-125m-classification.yml
@@ -11,4 +11,4 @@ max_length: 128
 num_train_epochs: 2
 datasets:
   - webgpt
-  - hfsummary
\ No newline at end of file
+  - hfsummary
diff --git a/model/reward/instructor/experimental_dataset.py b/model/reward/instructor/experimental_dataset.py
index 47d20d64..28f62967 100644
--- a/model/reward/instructor/experimental_dataset.py
+++ b/model/reward/instructor/experimental_dataset.py
@@ -1,4 +1,5 @@
-'''
+# -*- coding: utf-8 -*-
+"""
     HFSummary
 
         I want to train a multi regression model on axis_evals dataset mainly we can estimate the score of these score
@@ -7,15 +8,16 @@
 
         Should be better than just a preference score
 
-'''
-import torch
-from typing import Optional, Union
-import numpy as np
+"""
 from collections import defaultdict
-from datasets import load_dataset
 from dataclasses import dataclass
+from typing import Optional, Union
+
+import numpy as np
+import torch
+from datasets import load_dataset
 from torch.utils.data import Dataset
-from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from transformers.tokenization_utils_base import PaddingStrategy, PreTrainedTokenizerBase
 
 
 @dataclass
@@ -25,12 +27,13 @@ class DataCollatorForSummaryScore:
     Data collator that will dynamically pad the inputs for multiple choice received.
 
     """
+
     tokenizer: PreTrainedTokenizerBase
     num_choices: int = 2
     padding: Union[bool, str, PaddingStrategy] = True
     max_length: Optional[int] = None
     pad_to_multiple_of: Optional[int] = None
-    drop_token_type: bool = False # galactica
+    drop_token_type: bool = False  # galactica
 
     def __call__(self, batch):
 
@@ -48,17 +51,17 @@ class DataCollatorForSummaryScore:
             return_tensors="pt",
         )
         if self.drop_token_type:
-            batch_feature.pop('token_type_ids')
+            batch_feature.pop("token_type_ids")
         # batch = {k: v.view(batch_size, self.num_choices, -1) for k, v in batch.items()}
-        batch_feature['labels'] = torch.from_numpy(np.array(labels)).float()
+        batch_feature["labels"] = torch.from_numpy(np.array(labels)).float()
         return batch_feature
 
 
 class HFSummaryQuality(Dataset):
     def __init__(self, split, tokenizer, max_length=300) -> None:
         super().__init__()
-        assert split in ('validation', 'test')
-        dataset = load_dataset('Tristan/summarize_from_feedback', 'axis')[split]
+        assert split in ("validation", "test")
+        dataset = load_dataset("Tristan/summarize_from_feedback", "axis")[split]
         self.max_length = max_length
         mean_scores = defaultdict(list)
         self.contexts = []
@@ -66,22 +69,21 @@ class HFSummaryQuality(Dataset):
         self.labels = []
         for data in dataset:
 
-            if 'article' in data['info'] and \
-                data['info']['article'] is not None:
-                context = data['info']['article']
-            elif 'post' in data['info']:
-                context = data['info']['post']
+            if "article" in data["info"] and data["info"]["article"] is not None:
+                context = data["info"]["article"]
+            elif "post" in data["info"]:
+                context = data["info"]["post"]
             self.contexts.append(context)
 
-            response = data['summary']['text']
+            response = data["summary"]["text"]
             self.responses.append(response)
-            self.labels.append(data['summary']['axes'])
-            for axis, score in data['summary']['axes'].items():
+            self.labels.append(data["summary"]["axes"])
+            for axis, score in data["summary"]["axes"].items():
                 if score is not None:
                     mean_scores[axis].append(score)
 
-        self.label2idx = { key: idx for idx, key in enumerate(mean_scores.keys()) }
-        self.label2mean = { key: np.mean(scores) for key, scores in mean_scores.items() }
+        self.label2idx = {key: idx for idx, key in enumerate(mean_scores.keys())}
+        self.label2mean = {key: np.mean(scores) for key, scores in mean_scores.items()}
         self.tokenizer = tokenizer
         print(self.label2idx)
 
@@ -94,7 +96,5 @@ class HFSummaryQuality(Dataset):
         response = self.responses[index]
         labels = np.zeros(len(self.label2idx))
         for key, score in self.labels[index].items():
-            labels[self.label2idx[key]] = (self.label2mean[key] if score is None else score)/10
+            labels[self.label2idx[key]] = (self.label2mean[key] if score is None else score) / 10
         return self.tokenizer(context, response, truncation=True, max_length=self.max_length), labels
-
-
diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py
index f38885e4..99ba9955 100644
--- a/model/reward/instructor/rank_datasets.py
+++ b/model/reward/instructor/rank_datasets.py
@@ -1,4 +1,5 @@
-'''
+# -*- coding: utf-8 -*-
+"""
     author: theblackcat102
 
     Dataset output format from __getitem__
@@ -17,13 +18,15 @@
         inferior than the human perference one
 
 
-'''
-from typing import Optional, Union
+"""
 from dataclasses import dataclass
+from typing import Optional, Union
+
 import numpy as np
-from torch.utils.data import Dataset
 from datasets import load_dataset
-from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
+from torch.utils.data import Dataset
+from transformers.tokenization_utils_base import PaddingStrategy, PreTrainedTokenizerBase
+
 
 @dataclass
 class DataCollatorForPairRank:
@@ -32,12 +35,13 @@ class DataCollatorForPairRank:
     Data collator that will dynamically pad the inputs for multiple choice received.
 
     """
+
     tokenizer: PreTrainedTokenizerBase
     num_choices: int = 2
     padding: Union[bool, str, PaddingStrategy] = True
     max_length: Optional[int] = None
     pad_to_multiple_of: Optional[int] = None
-    drop_token_type: bool = False # galactica
+    drop_token_type: bool = False  # galactica
 
     def __call__(self, features):
 
@@ -45,12 +49,10 @@ class DataCollatorForPairRank:
         batch_size = 0
         for question, pairs in features:
             for (pos, neg) in pairs:
-                flatten_features.append(self.tokenizer(question, pos,
-                    truncation=True, max_length=self.max_length))
-                flatten_features.append(self.tokenizer(question, neg,
-                    truncation=True, max_length=self.max_length))
+                flatten_features.append(self.tokenizer(question, pos, truncation=True, max_length=self.max_length))
+                flatten_features.append(self.tokenizer(question, neg, truncation=True, max_length=self.max_length))
                 batch_size += 1
-        
+
         batch = self.tokenizer.pad(
             flatten_features,
             padding=self.padding,
@@ -59,13 +61,12 @@ class DataCollatorForPairRank:
             return_tensors="pt",
         )
         if self.drop_token_type:
-            batch.pop('token_type_ids')
+            batch.pop("token_type_ids")
         # batch = {k: v.view(batch_size, self.num_choices, -1) for k, v in batch.items()}
         return batch
 
 
 class WebGPT(Dataset):
-
     def __init__(self) -> None:
         super().__init__()
 
@@ -74,23 +75,19 @@ class WebGPT(Dataset):
         # using prompt as our index will allows us
         # to add additional generated prompt later
         self.index2question = {}
-        for row in dataset['train']:
-            question = row['question']['full_text']
+        for row in dataset["train"]:
+            question = row["question"]["full_text"]
             if question not in self.index2question:
                 self.index2question[len(self.index2question)] = question
 
             if question not in questions:
                 questions[question] = []
 
-            if row['score_0'] > row['score_1']:
+            if row["score_0"] > row["score_1"]:
                 # not going to risk it
-                questions[question].append((
-                    row['answer_0'], row['answer_1']
-                ))
+                questions[question].append((row["answer_0"], row["answer_1"]))
             else:
-                questions[question].append((
-                    row['answer_1'], row['answer_0']
-                ))
+                questions[question].append((row["answer_1"], row["answer_0"]))
 
         self.questions = questions
 
@@ -104,61 +101,55 @@ class WebGPT(Dataset):
         return question, rows
 
 
-
-
 class HFSummary(Dataset):
-    '''
-        Human feedback data from OpenAI
-        https://github.com/openai/summarize-from-feedback
-        
-        labeling method : pair comparison, 0 or 1
+    """
+    Human feedback data from OpenAI
+    https://github.com/openai/summarize-from-feedback
 
-    '''
-    def __init__(self, split='train',
-        conf_threshold=-1,
-        max_comparison_per_sample=3) -> None:
+    labeling method : pair comparison, 0 or 1
+
+    """
+
+    def __init__(self, split="train", conf_threshold=-1, max_comparison_per_sample=3) -> None:
         super().__init__()
-        assert split in ('train', 'valid1', 'valid2', 'test')
+        assert split in ("train", "valid1", "valid2", "test")
         summaries = {}
         # using prompt as our index will allows us
         # to add additional generated prompt later
         self.index2summary = {}
         self.max_comparison_per_sample = max_comparison_per_sample
-        major_split = split if 'train' == split else 'validation'
-        dataset = load_dataset('Tristan/summarize_from_feedback', 'comparisons')[major_split]
+        major_split = split if "train" == split else "validation"
+        dataset = load_dataset("Tristan/summarize_from_feedback", "comparisons")[major_split]
         for data in dataset:
-            if 'extra' in data and \
-                'confidence' in data['extra'] and \
-                data['extra']['confidence'] is not None and \
-                conf_threshold > data['extra']['confidence']:
-                print('skipping {}'.format(data['info']['id']))
+            if (
+                "extra" in data
+                and "confidence" in data["extra"]
+                and data["extra"]["confidence"] is not None
+                and conf_threshold > data["extra"]["confidence"]
+            ):
+                print("skipping {}".format(data["info"]["id"]))
                 continue
 
-            if split != 'train' and split != data['split']:
+            if split != "train" and split != data["split"]:
                 continue
 
-            if 'article' in data['info'] and \
-                data['info']['article'] is not None:
-                context = data['info']['article']
-            elif 'post' in data['info']:
-                context = data['info']['post']
-
+            if "article" in data["info"] and data["info"]["article"] is not None:
+                context = data["info"]["article"]
+            elif "post" in data["info"]:
+                context = data["info"]["post"]
 
             if context not in self.index2summary:
                 self.index2summary[len(self.index2summary)] = context
-            
+
             if context not in summaries:
                 summaries[context] = []
 
-            pos, neg = (0, 1) if data['choice'] == 0 else (1, 0)
-            summaries[context].append((
-                data['summaries'][pos]['text'],
-                data['summaries'][neg]['text']
-            ))
+            pos, neg = (0, 1) if data["choice"] == 0 else (1, 0)
+            summaries[context].append((data["summaries"][pos]["text"], data["summaries"][neg]["text"]))
 
         self.summaries = summaries
 
-        self.postfix_prompt = ' TLDR;'
+        self.postfix_prompt = " TLDR;"
 
     def __len__(self):
         return len(self.index2summary)
@@ -172,5 +163,4 @@ class HFSummary(Dataset):
         # not optimal but good for now
         valid_idx = np.random.choice(len(rows), self.max_comparison_per_sample)
         # optimize the format later
-        return context+self.postfix_prompt, [ r for idx, r in enumerate(rows) if idx in valid_idx ]
-
+        return context + self.postfix_prompt, [r for idx, r in enumerate(rows) if idx in valid_idx]
diff --git a/model/reward/instructor/summary_quality_trainer.py b/model/reward/instructor/summary_quality_trainer.py
index a6604819..88bf1abf 100644
--- a/model/reward/instructor/summary_quality_trainer.py
+++ b/model/reward/instructor/summary_quality_trainer.py
@@ -1,46 +1,72 @@
+# -*- coding: utf-8 -*-
 import os
-os.environ['WANDB_PROJECT'] = 'quality-scoring'
-import torch
-import yaml
-import evaluate
-from typing import Any, Callable, List, Optional, Tuple, Union, Dict
-from torch import nn
 from argparse import ArgumentParser
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+import evaluate
 import numpy as np
+import torch
+from experimental_dataset import DataCollatorForSummaryScore, HFSummaryQuality
+from torch import nn
 from torch.utils.data import Dataset
-from transformers import AutoModelForSequenceClassification
-from transformers import Trainer, PreTrainedModel, TrainingArguments, DataCollator, EvalPrediction, TrainerCallback, PreTrainedTokenizerBase
-from experimental_dataset import HFSummaryQuality, DataCollatorForSummaryScore
-from utils import get_tokenizer, train_val_dataset, freeze_top_n_layers, argument_parsing
+from transformers import (
+    AutoModelForSequenceClassification,
+    DataCollator,
+    EvalPrediction,
+    PreTrainedModel,
+    PreTrainedTokenizerBase,
+    Trainer,
+    TrainerCallback,
+    TrainingArguments,
+)
+from utils import argument_parsing, freeze_top_n_layers, get_tokenizer
+
+os.environ["WANDB_PROJECT"] = "quality-scoring"
 
 parser = ArgumentParser()
-parser.add_argument('config', type=str)
+parser.add_argument("config", type=str)
 
 accuracy = evaluate.load("mse")
+
+
 def compute_metrics(eval_pred):
     predictions, labels = eval_pred
     return accuracy.compute(predictions=predictions.flatten(), references=labels.flatten())
 
 
 class QualityTrainer(Trainer):
-    def __init__(self, model: Union[PreTrainedModel, nn.Module] = None,
-                 args: TrainingArguments = None,
-                 data_collator: Optional[DataCollator] = None,
-                 train_dataset: Optional[Dataset] = None,
-                 eval_dataset: Optional[Dataset] = None,
-                 tokenizer: Optional[PreTrainedTokenizerBase] = None,
-                 model_init: Callable[[], PreTrainedModel] = None,
-                 compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
-                 callbacks: Optional[List[TrainerCallback]] = None,
-                 optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
-                 preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] = None):
-        super().__init__(model, args, data_collator, train_dataset, eval_dataset, tokenizer,
-                         model_init, compute_metrics, callbacks, optimizers, preprocess_logits_for_metrics)
+    def __init__(
+        self,
+        model: Union[PreTrainedModel, nn.Module] = None,
+        args: TrainingArguments = None,
+        data_collator: Optional[DataCollator] = None,
+        train_dataset: Optional[Dataset] = None,
+        eval_dataset: Optional[Dataset] = None,
+        tokenizer: Optional[PreTrainedTokenizerBase] = None,
+        model_init: Callable[[], PreTrainedModel] = None,
+        compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
+        callbacks: Optional[List[TrainerCallback]] = None,
+        optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
+        preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] = None,
+    ):
+        super().__init__(
+            model,
+            args,
+            data_collator,
+            train_dataset,
+            eval_dataset,
+            tokenizer,
+            model_init,
+            compute_metrics,
+            callbacks,
+            optimizers,
+            preprocess_logits_for_metrics,
+        )
         self.loss_fct = nn.L1Loss()
         self.sigmoid = nn.Sigmoid()
 
     def compute_loss(self, model, inputs, return_outputs=False):
-        labels = inputs.pop('labels')
+        labels = inputs.pop("labels")
         # forward pass
         outputs = model(**inputs)
         logits = self.sigmoid(outputs.get("logits"))
@@ -50,75 +76,73 @@ class QualityTrainer(Trainer):
 
     def _compute_loss(self, model, inputs):
         inputs = self._prepare_inputs(inputs)
-        labels = inputs.pop('labels')
+        labels = inputs.pop("labels")
         outputs = model(**inputs)
         logits = self.sigmoid(outputs.get("logits"))
         loss = self.loss_fct(logits, labels)
 
         return loss, logits
 
-    def prediction_step(self, model: nn.Module,
-            inputs: Dict[str, Union[torch.Tensor, Any]],
-            prediction_loss_only: bool,
-            ignore_keys: Optional[List[str]] = None) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
+    def prediction_step(
+        self,
+        model: nn.Module,
+        inputs: Dict[str, Union[torch.Tensor, Any]],
+        prediction_loss_only: bool,
+        ignore_keys: Optional[List[str]] = None,
+    ) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
 
         with torch.no_grad():
             # compute loss on predict data
             loss, logits = self._compute_loss(model, inputs)
 
         loss = loss.mean().detach()
-        labels = inputs['labels']
+        labels = inputs["labels"]
         if self.args.prediction_loss_only:
             return (loss, None, None)
 
         return (loss, logits, labels)
 
+
 if __name__ == "__main__":
     training_conf = argument_parsing(parser)
 
-    model_name = training_conf['model_name']
+    model_name = training_conf["model_name"]
     tokenizer = get_tokenizer(model_name)
-    collate_fn = DataCollatorForSummaryScore(tokenizer,
-        max_length=training_conf['max_length'],
-        drop_token_type= 'galactica' in model_name
+    collate_fn = DataCollatorForSummaryScore(
+        tokenizer, max_length=training_conf["max_length"], drop_token_type="galactica" in model_name
+    )
+    train = HFSummaryQuality(split="validation", tokenizer=tokenizer, max_length=training_conf["max_length"])
+    eval = HFSummaryQuality(split="test", tokenizer=tokenizer, max_length=training_conf["max_length"])
+    model = AutoModelForSequenceClassification.from_pretrained(
+        model_name, num_labels=len(train.label2idx), problem_type="regression"
     )
-    train = HFSummaryQuality(split='validation',
-        tokenizer=tokenizer,
-        max_length=training_conf['max_length']
-        )
-    eval = HFSummaryQuality(split='test',
-            tokenizer=tokenizer,
-            max_length=training_conf['max_length']
-        )
-    model = AutoModelForSequenceClassification.from_pretrained(model_name,
-        num_labels=len(train.label2idx), problem_type='regression')
 
-    if 'freeze_layer' in training_conf:
-        num_layer = training_conf['freeze_layer']
+    if "freeze_layer" in training_conf:
+        num_layer = training_conf["freeze_layer"]
         model = freeze_top_n_layers(model, num_layer)
         model_parameters = filter(lambda p: p.requires_grad, model.parameters())
         params = sum([np.prod(p.size()) for p in model_parameters])
-        print('Number of trainable : {}M'.format(int(params/1e6)))
+        print("Number of trainable : {}M".format(int(params / 1e6)))
 
     args = TrainingArguments(
         output_dir=f"{model_name}-finetuned",
-        num_train_epochs=training_conf['num_train_epochs'],
+        num_train_epochs=training_conf["num_train_epochs"],
         warmup_steps=500,
-        learning_rate=training_conf['learning_rate'],
+        learning_rate=training_conf["learning_rate"],
         # half_precision_backend="apex",
         fp16=True,
-        gradient_checkpointing=training_conf['gradient_checkpointing'],
-        gradient_accumulation_steps=training_conf['gradient_accumulation_steps'],
-        per_device_train_batch_size=training_conf['per_device_train_batch_size'],
-        per_device_eval_batch_size=training_conf['per_device_eval_batch_size'],
+        gradient_checkpointing=training_conf["gradient_checkpointing"],
+        gradient_accumulation_steps=training_conf["gradient_accumulation_steps"],
+        per_device_train_batch_size=training_conf["per_device_train_batch_size"],
+        per_device_eval_batch_size=training_conf["per_device_eval_batch_size"],
         weight_decay=0.01,
         max_grad_norm=2.0,
         logging_steps=10,
         save_total_limit=4,
-        evaluation_strategy='steps',
-        eval_steps=training_conf['eval_steps'],
+        evaluation_strategy="steps",
+        eval_steps=training_conf["eval_steps"],
         save_steps=1000,
-        report_to='wandb'
+        report_to="wandb",
     )
     trainer = QualityTrainer(
         model,
@@ -127,6 +151,6 @@ if __name__ == "__main__":
         eval_dataset=eval,
         data_collator=collate_fn,
         tokenizer=tokenizer,
-        compute_metrics=compute_metrics
+        compute_metrics=compute_metrics,
     )
     trainer.train()
diff --git a/model/reward/instructor/tests/test_dataset.py b/model/reward/instructor/tests/test_dataset.py
index 271db83c..f367a50d 100644
--- a/model/reward/instructor/tests/test_dataset.py
+++ b/model/reward/instructor/tests/test_dataset.py
@@ -1,40 +1,41 @@
-from transformers import AutoTokenizer
+# -*- coding: utf-8 -*-
+from experimental_dataset import DataCollatorForSummaryScore, HFSummaryQuality
+from rank_datasets import DataCollatorForPairRank, HFSummary, WebGPT
 from torch.utils.data import DataLoader
-from rank_datasets import WebGPT, HFSummary, DataCollatorForPairRank
-from experimental_dataset import HFSummaryQuality, DataCollatorForSummaryScore
+from transformers import AutoTokenizer
+
 
 def test_hfsummary():
-    
+
     tokenizer = AutoTokenizer.from_pretrained("bigscience/mt0-large")
     collate_fn = DataCollatorForPairRank(tokenizer, max_length=200)
-    dataset = HFSummary('train')
+    dataset = HFSummary("train")
     print(len(dataset))
     dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=8)
     for batch in dataloader:
-        batch['input_ids'].shape
- 
+        batch["input_ids"].shape
+
 
 def test_webgpt():
-    
+
     tokenizer = AutoTokenizer.from_pretrained("bigscience/mt0-large")
     collate_fn = DataCollatorForPairRank(tokenizer, max_length=200)
     dataset = WebGPT()
     dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=32)
     for batch in dataloader:
-        print(batch['input_ids'].shape)
+        print(batch["input_ids"].shape)
 
 
 def test_hf_quality():
 
     tokenizer = AutoTokenizer.from_pretrained("bigscience/mt0-large")
     collate_fn = DataCollatorForSummaryScore(tokenizer, max_length=200)
-    dataset = HFSummaryQuality('validation', tokenizer)
+    dataset = HFSummaryQuality("validation", tokenizer)
     dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=32)
     for batch in dataloader:
-        print(batch['input_ids'].shape)
-
+        print(batch["input_ids"].shape)
 
 
 if __name__ == "__main__":
     test_hf_quality()
-    # test_webgpt()
\ No newline at end of file
+    # test_webgpt()
diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py
index c8063cf7..0e98e4c5 100644
--- a/model/reward/instructor/trainer.py
+++ b/model/reward/instructor/trainer.py
@@ -1,32 +1,44 @@
+# -*- coding: utf-8 -*-
 import os
-os.environ['WANDB_PROJECT'] = 'reward-model'
-import torch
-import yaml
-import evaluate
-from typing import Any, Callable, List, Optional, Tuple, Union, Dict
-from torch import nn
 from argparse import ArgumentParser
-import numpy as np
 from dataclasses import dataclass
-from torch.utils.data import Dataset, ConcatDataset
-from transformers import AutoModelForSequenceClassification
-from transformers import Trainer, PreTrainedModel, TrainingArguments, DataCollator, EvalPrediction, TrainerCallback, PreTrainedTokenizerBase
-from rank_datasets import DataCollatorForPairRank, WebGPT, HFSummary
-from utils import get_tokenizer, train_val_dataset, freeze_top_n_layers, argument_parsing
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+import evaluate
+import numpy as np
+import torch
+from rank_datasets import DataCollatorForPairRank, HFSummary, WebGPT
+from torch import nn
+from torch.utils.data import ConcatDataset, Dataset
+from transformers import (
+    AutoModelForSequenceClassification,
+    DataCollator,
+    EvalPrediction,
+    PreTrainedModel,
+    PreTrainedTokenizerBase,
+    Trainer,
+    TrainerCallback,
+    TrainingArguments,
+)
+from utils import argument_parsing, freeze_top_n_layers, get_tokenizer, train_val_dataset
+
+os.environ["WANDB_PROJECT"] = "reward-model"
 
 accuracy = evaluate.load("accuracy")
 parser = ArgumentParser()
-parser.add_argument('config', type=str)
+parser.add_argument("config", type=str)
+
 
 @dataclass
 class CustomTrainingArguments(TrainingArguments):
-    loss_function: str='rank'
+    loss_function: str = "rank"
 
 
 def compute_metrics(eval_pred):
     predictions, _ = eval_pred
     predictions = np.argmax(predictions, axis=1)
-    return accuracy.compute(predictions=predictions, references=[0]*predictions.shape[0])
+    return accuracy.compute(predictions=predictions, references=[0] * predictions.shape[0])
+
 
 class RankLoss(nn.Module):
     def __init__(self, eps=1e-8) -> None:
@@ -39,27 +51,41 @@ class RankLoss(nn.Module):
 
 
 class RankTrainer(Trainer):
-    def __init__(self, model: Union[PreTrainedModel, nn.Module] = None,
-                 args: TrainingArguments = None,
-                 data_collator: Optional[DataCollator] = None,
-                 train_dataset: Optional[Dataset] = None,
-                 eval_dataset: Optional[Dataset] = None,
-                 tokenizer: Optional[PreTrainedTokenizerBase] = None,
-                 model_init: Callable[[], PreTrainedModel] = None,
-                 compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
-                 callbacks: Optional[List[TrainerCallback]] = None,
-                 optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
-                 preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] = None):
-        super().__init__(model, args, data_collator, train_dataset, eval_dataset, tokenizer,
-                         model_init, compute_metrics, callbacks, optimizers, preprocess_logits_for_metrics)
-        self.loss_fct = RankLoss() if args.loss_function == 'rank' else nn.CrossEntropyLoss()
+    def __init__(
+        self,
+        model: Union[PreTrainedModel, nn.Module] = None,
+        args: TrainingArguments = None,
+        data_collator: Optional[DataCollator] = None,
+        train_dataset: Optional[Dataset] = None,
+        eval_dataset: Optional[Dataset] = None,
+        tokenizer: Optional[PreTrainedTokenizerBase] = None,
+        model_init: Callable[[], PreTrainedModel] = None,
+        compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
+        callbacks: Optional[List[TrainerCallback]] = None,
+        optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
+        preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] = None,
+    ):
+        super().__init__(
+            model,
+            args,
+            data_collator,
+            train_dataset,
+            eval_dataset,
+            tokenizer,
+            model_init,
+            compute_metrics,
+            callbacks,
+            optimizers,
+            preprocess_logits_for_metrics,
+        )
+        self.loss_fct = RankLoss() if args.loss_function == "rank" else nn.CrossEntropyLoss()
         self.loss_function = args.loss_function
 
     def compute_loss(self, model, inputs, return_outputs=False):
         # forward pass
         outputs = model(**inputs)
         logits = outputs.get("logits").view(-1, 2)
-        if self.loss_function == 'rank':
+        if self.loss_function == "rank":
             loss = self.loss_fct(logits[:, 0], logits[:, 1])
         else:
             loss = self.loss_fct(logits, torch.zeros(logits.shape[0], device=logits.device, dtype=torch.long))
@@ -70,17 +96,20 @@ class RankTrainer(Trainer):
         inputs = self._prepare_inputs(inputs)
         outputs = model(**inputs)
         logits = outputs.get("logits").view(-1, 2)
-        if self.loss_function == 'rank':
+        if self.loss_function == "rank":
             loss = self.loss_fct(logits[:, 0], logits[:, 1])
         else:
             loss = self.loss_fct(logits, torch.zeros(logits.shape[0], device=logits.device, dtype=torch.long))
 
         return loss, logits
 
-    def prediction_step(self, model: nn.Module,
-            inputs: Dict[str, Union[torch.Tensor, Any]],
-            prediction_loss_only: bool,
-            ignore_keys: Optional[List[str]] = None) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
+    def prediction_step(
+        self,
+        model: nn.Module,
+        inputs: Dict[str, Union[torch.Tensor, Any]],
+        prediction_loss_only: bool,
+        ignore_keys: Optional[List[str]] = None,
+    ) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
 
         with torch.no_grad():
             # compute loss on predict data
@@ -93,54 +122,57 @@ class RankTrainer(Trainer):
 
         return (loss, logits, labels)
 
+
 if __name__ == "__main__":
     training_conf = argument_parsing(parser)
 
-    model_name = training_conf['model_name']
-    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type='regression')
-    if 'freeze_layer' in training_conf:
-        num_layer = training_conf['freeze_layer']
+    model_name = training_conf["model_name"]
+    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type="regression")
+    if "freeze_layer" in training_conf:
+        num_layer = training_conf["freeze_layer"]
         model = freeze_top_n_layers(model, num_layer)
         model_parameters = filter(lambda p: p.requires_grad, model.parameters())
         params = sum([np.prod(p.size()) for p in model_parameters])
-        print('Number of trainable : {}M'.format(int(params/1e6)))
+        print("Number of trainable : {}M".format(int(params / 1e6)))
 
     tokenizer = get_tokenizer(model_name)
     args = CustomTrainingArguments(
         output_dir=f"{model_name}-finetuned",
-        num_train_epochs=training_conf['num_train_epochs'],
+        num_train_epochs=training_conf["num_train_epochs"],
         warmup_steps=500,
-        loss_function=training_conf['loss'],
-        learning_rate=training_conf['learning_rate'],
+        loss_function=training_conf["loss"],
+        learning_rate=training_conf["learning_rate"],
         # half_precision_backend="apex",
         fp16=True,
-        gradient_checkpointing=training_conf['gradient_checkpointing'],
-        gradient_accumulation_steps=training_conf['gradient_accumulation_steps'],
-        per_device_train_batch_size=training_conf['per_device_train_batch_size'],
-        per_device_eval_batch_size=training_conf['per_device_eval_batch_size'],
+        gradient_checkpointing=training_conf["gradient_checkpointing"],
+        gradient_accumulation_steps=training_conf["gradient_accumulation_steps"],
+        per_device_train_batch_size=training_conf["per_device_train_batch_size"],
+        per_device_eval_batch_size=training_conf["per_device_eval_batch_size"],
         weight_decay=0.01,
         max_grad_norm=2.0,
         logging_steps=10,
         save_total_limit=4,
-        evaluation_strategy='steps',
-        eval_steps=training_conf['eval_steps'],
+        evaluation_strategy="steps",
+        eval_steps=training_conf["eval_steps"],
         save_steps=1000,
-        report_to='wandb'
+        report_to="wandb",
     )
     train_datasets, evals = [], {}
-    if 'webgpt' in training_conf['datasets']:
+    if "webgpt" in training_conf["datasets"]:
         web_dataset = WebGPT()
         train, eval = train_val_dataset(web_dataset)
         train_datasets.append(train)
-        evals['webgpt'] = eval
-    if 'hfsummary' in training_conf['datasets']:
-        sum_train = HFSummary(split='train')
+        evals["webgpt"] = eval
+    if "hfsummary" in training_conf["datasets"]:
+        sum_train = HFSummary(split="train")
         train_datasets.append(sum_train)
-        sum_eval = HFSummary(split='valid1')
+        sum_eval = HFSummary(split="valid1")
         assert len(sum_eval) > 0
-        evals['hfsummary'] = sum_eval
+        evals["hfsummary"] = sum_eval
     train = ConcatDataset(train_datasets)
-    collate_fn = DataCollatorForPairRank(tokenizer, max_length=training_conf['max_length'], drop_token_type= 'galactica' in model_name)
+    collate_fn = DataCollatorForPairRank(
+        tokenizer, max_length=training_conf["max_length"], drop_token_type="galactica" in model_name
+    )
     assert len(evals) > 0
     trainer = RankTrainer(
         model,
@@ -149,6 +181,6 @@ if __name__ == "__main__":
         eval_dataset=eval,
         data_collator=collate_fn,
         tokenizer=tokenizer,
-        compute_metrics=compute_metrics
+        compute_metrics=compute_metrics,
     )
     trainer.train()
diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py
index d59bb13c..9441ddb9 100644
--- a/model/reward/instructor/utils.py
+++ b/model/reward/instructor/utils.py
@@ -1,96 +1,100 @@
+# -*- coding: utf-8 -*-
 import re
+
 import yaml
-from torch.utils.data import Subset
 from sklearn.model_selection import train_test_split
+from torch.utils.data import Subset
 from transformers import AutoTokenizer
 
-re_reference_remove = re.compile(r'\[([0-9])+\]|\[([0-9])+,([0-9])+\]')
+re_reference_remove = re.compile(r"\[([0-9])+\]|\[([0-9])+,([0-9])+\]")
+
 
 def webgpt_return_format(row):
-    if row['score_0'] >= row['score_1']:
+    if row["score_0"] >= row["score_1"]:
         # remove this to prevent information leak, since we are not using reference
         return {
-                'question': row['question']['full_text'],
-                     'pos': re_reference_remove.sub('', row['answer_0']),
-                     'neg': re_reference_remove.sub('', row['answer_1'])
-                }
+            "question": row["question"]["full_text"],
+            "pos": re_reference_remove.sub("", row["answer_0"]),
+            "neg": re_reference_remove.sub("", row["answer_1"]),
+        }
 
     return {
-            'question': row['question']['full_text'],
-                 'pos': re_reference_remove.sub('', row['answer_1']),
-                 'neg': re_reference_remove.sub('', row['answer_0'])
-            }
+        "question": row["question"]["full_text"],
+        "pos": re_reference_remove.sub("", row["answer_1"]),
+        "neg": re_reference_remove.sub("", row["answer_0"]),
+    }
 
 
 def get_tokenizer(tokenizer_name):
     tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
-    if 'galactica' in tokenizer_name:
-        tokenizer.add_special_tokens({'pad_token':'<pad>', 'eos_token': '</s>' })
+    if "galactica" in tokenizer_name:
+        tokenizer.add_special_tokens({"pad_token": "<pad>", "eos_token": "</s>"})
 
     return tokenizer
 
 
-
 def train_val_dataset(dataset, val_split=0.2):
-    train_idx, val_idx = train_test_split(list(range(len(dataset))), 
-        test_size=val_split, random_state=666, shuffle=True)
+    train_idx, val_idx = train_test_split(
+        list(range(len(dataset))), test_size=val_split, random_state=666, shuffle=True
+    )
     # [3879, 11479, 8341, 9177, 10798, 18177, 5735, 15669, 4837, 2760]
     print(val_idx[:10])
     # [13582, 5919, 11875, 7373, 19135, 13706, 8555, 15788, 15005, 15209]
     print(train_idx[:10])
     return Subset(dataset, train_idx), Subset(dataset, val_idx)
 
+
 def freeze_top_n_layers(model, target_layers):
     # its possible we can simply detect which module is a ModuleList
     # and simply freeze the module without doing string parsing
     for name, param in model.named_parameters():
-        if 'embed' in name:
+        if "embed" in name:
             param.requires_grad = False
-        elif '.layer' in name or '.h.' in name:
-            tokens = name.split('.')
+        elif ".layer" in name or ".h." in name:
+            tokens = name.split(".")
             idx = 0
             for token in tokens:
-                if 'layer' in token or token == 'h':
+                if "layer" in token or token == "h":
                     break
                 idx += 1
             if idx >= len(tokens):
                 continue
 
-            layer_ = int(tokens[idx+1])
+            layer_ = int(tokens[idx + 1])
             if layer_ < target_layers:
                 # print('freeze ', layer_, name)
                 param.requires_grad = False
     return model
 
+
 def argument_parsing(parser):
     default_params = {
-        'num_train_epochs': 4,
-        'learning_rate': 3e-5,
-        'eval_steps': 500,
-        'loss': 'rank',
-        'max_length': 440,
-        'per_device_eval_batch_size': 5,
-        'per_device_train_batch_size': 8,
-        'gradient_accumulation_steps': 8,
-        'gradient_checkpointing': False,
-        'datasets': ['webgpt']
+        "num_train_epochs": 4,
+        "learning_rate": 3e-5,
+        "eval_steps": 500,
+        "loss": "rank",
+        "max_length": 440,
+        "per_device_eval_batch_size": 5,
+        "per_device_train_batch_size": 8,
+        "gradient_accumulation_steps": 8,
+        "gradient_checkpointing": False,
+        "datasets": ["webgpt"],
     }
     args = parser.parse_args()
-    with open(args.config, 'r', encoding='utf-8') as f:
+    with open(args.config, "r", encoding="utf-8") as f:
         training_conf = yaml.safe_load(f.read())
 
-    params = { **default_params, **training_conf }
-    params['gradient_accumulation_steps'] = int(params['gradient_accumulation_steps'])
-    params['num_train_epochs'] = int(params['num_train_epochs'])
-    params['per_device_train_batch_size'] = int(params['per_device_train_batch_size'])
-    params['learning_rate'] = float(params['learning_rate'])
+    params = {**default_params, **training_conf}
+    params["gradient_accumulation_steps"] = int(params["gradient_accumulation_steps"])
+    params["num_train_epochs"] = int(params["num_train_epochs"])
+    params["per_device_train_batch_size"] = int(params["per_device_train_batch_size"])
+    params["learning_rate"] = float(params["learning_rate"])
     return params
 
 
-
 if __name__ == "__main__":
     from transformers import AutoModelForSequenceClassification
 
-    model = AutoModelForSequenceClassification.from_pretrained('bigscience/bloomz-560m')
+    model = AutoModelForSequenceClassification.from_pretrained("bigscience/bloomz-560m")
     freeze_top_n_layers(model, 10)
-    print(model.state_dict().keys())
\ No newline at end of file
+    print(model.state_dict().keys())

From 28e0b4f77020ea9cd5317bbc4094c9008083a99f Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sun, 1 Jan 2023 12:03:34 +0000
Subject: [PATCH 32/53] [fix] Revert deleted vscode

---
 .vscode/settings.json                    |   4 +
 model/reward/instructor/requirements.txt | 136 +----------------------
 2 files changed, 5 insertions(+), 135 deletions(-)
 create mode 100644 .vscode/settings.json

diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 00000000..4c58a32f
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,4 @@
+{
+  "python.formatting.provider": "autopep8",
+  "python.analysis.extraPaths": ["${workspaceFolder}/oasst-shared"]
+}
diff --git a/model/reward/instructor/requirements.txt b/model/reward/instructor/requirements.txt
index 9fc45917..cb1a9e68 100644
--- a/model/reward/instructor/requirements.txt
+++ b/model/reward/instructor/requirements.txt
@@ -1,140 +1,6 @@
-aiohttp==3.8.3
-aiosignal==1.3.1
-anyio==3.6.2
-argon2-cffi==21.3.0
-argon2-cffi-bindings==21.2.0
-arrow==1.2.3
-asttokens==2.2.1
-async-timeout==4.0.2
-attrs==22.2.0
-autopep8==2.0.1
-backcall==0.2.0
-beautifulsoup4==4.11.1
-bleach==5.0.1
-certifi==2022.12.7
-cffi==1.15.1
-charset-normalizer==2.1.1
-click==8.1.3
-comm==0.1.2
 datasets==2.8.0
-debugpy==1.6.4
-decorator==5.1.1
-defusedxml==0.7.1
-dill==0.3.6
-docker-pycreds==0.4.0
-entrypoints==0.4
 evaluate==0.4.0
-exceptiongroup==1.1.0
-executing==1.2.0
-fastjsonschema==2.16.2
-filelock==3.9.0
-fqdn==1.5.1
-frozenlist==1.3.3
-fsspec==2022.11.0
-gitdb==4.0.10
-GitPython==3.1.30
-huggingface-hub==0.11.1
-idna==3.4
-iniconfig==1.1.1
-ipykernel==6.19.4
-ipython==8.7.0
-ipython-genutils==0.2.0
-ipywidgets==8.0.4
-isoduration==20.11.0
-jedi==0.18.2
-Jinja2==3.1.2
-joblib==1.2.0
-jsonpointer==2.3
-jsonschema==4.17.3
-jupyter==1.0.0
-jupyter-console==6.4.4
-jupyter-events==0.5.0
-jupyter_client==7.4.8
-jupyter_core==5.1.1
-jupyter_server==2.0.6
-jupyter_server_terminals==0.4.3
-jupyterlab-pygments==0.2.2
-jupyterlab-widgets==3.0.5
-lightning-utilities==0.5.0
-MarkupSafe==2.1.1
-matplotlib-inline==0.1.6
-mistune==2.0.4
-multidict==6.0.4
-multiprocess==0.70.14
-nbclassic==0.4.8
-nbclient==0.7.2
-nbconvert==7.2.7
-nbformat==5.7.1
-nest-asyncio==1.5.6
-notebook==6.5.2
-notebook_shim==0.2.2
-numpy==1.24.1
-packaging==22.0
-pandas==1.5.2
-pandocfilters==1.5.0
-parso==0.8.3
-pathtools==0.1.2
-pexpect==4.8.0
-pickleshare==0.7.5
-platformdirs==2.6.2
-pluggy==1.0.0
-prometheus-client==0.15.0
-promise==2.3
-prompt-toolkit==3.0.36
-protobuf==3.20.1
-psutil==5.9.4
-ptyprocess==0.7.0
-pure-eval==0.2.2
-pyarrow==10.0.1
-pycodestyle==2.10.0
-pycparser==2.21
-Pygments==2.13.0
-pyrsistent==0.19.3
-pytest==7.2.0
-python-dateutil==2.8.2
-python-json-logger==2.0.4
-pytorch-lightning==1.8.6
-pytz==2022.7
-PyYAML==6.0
-pyzmq==24.0.1
-qtconsole==5.4.0
-QtPy==2.3.0
-regex==2022.10.31
-requests==2.28.1
-responses==0.18.0
-rfc3339-validator==0.1.4
-rfc3986-validator==0.1.1
 scikit-learn==1.2.0
-scipy==1.9.3
-Send2Trash==1.8.0
-sentry-sdk==1.12.1
-setproctitle==1.3.2
-shortuuid==1.0.11
-six==1.16.0
-smmap==5.0.0
-sniffio==1.3.0
-soupsieve==2.3.2.post1
-stack-data==0.6.2
-tensorboardX==2.5.1
-terminado==0.17.1
-threadpoolctl==3.1.0
-tinycss2==1.2.1
-tokenizers==0.13.2
-tomli==2.0.1
 torch==1.12.1+cu116
-torchmetrics==0.11.0
-tornado==6.2
-tqdm==4.64.1
-traitlets==5.8.0
 transformers==4.25.1
-typing_extensions==4.4.0
-uri-template==1.2.0
-urllib3==1.26.13
-wandb==0.13.7
-wcwidth==0.2.5
-webcolors==1.12
-webencodings==0.5.1
-websocket-client==1.4.2
-widgetsnbextension==4.0.5
-xxhash==3.2.0
-yarl==1.8.2
+wandb==0.13.7
\ No newline at end of file

From 8f0028bc44133af8bd54c301fa8546d56cadc2bf Mon Sep 17 00:00:00 2001
From: theblackcat102 <theblackcat102@github.com>
Date: Sun, 1 Jan 2023 13:28:48 +0000
Subject: [PATCH 33/53] [fix] Fix provider

---
 .vscode/settings.json                    | 2 +-
 model/reward/instructor/requirements.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 4c58a32f..56a51f78 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,4 +1,4 @@
 {
-  "python.formatting.provider": "autopep8",
+  "python.formatting.provider": "black",
   "python.analysis.extraPaths": ["${workspaceFolder}/oasst-shared"]
 }
diff --git a/model/reward/instructor/requirements.txt b/model/reward/instructor/requirements.txt
index cb1a9e68..e225a2ca 100644
--- a/model/reward/instructor/requirements.txt
+++ b/model/reward/instructor/requirements.txt
@@ -3,4 +3,4 @@ evaluate==0.4.0
 scikit-learn==1.2.0
 torch==1.12.1+cu116
 transformers==4.25.1
-wandb==0.13.7
\ No newline at end of file
+wandb==0.13.7

From b5186980e608bb969cba78b1d3bcb6a29c049dc5 Mon Sep 17 00:00:00 2001
From: Yannic Kilcher <yk@users.noreply.github.com>
Date: Sun, 1 Jan 2023 17:17:05 +0100
Subject: [PATCH 34/53] added problems & solutions section to readme

---
 README.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/README.md b/README.md
index 369724c1..e0ea65f1 100644
--- a/README.md
+++ b/README.md
@@ -136,3 +136,9 @@ In case you haven't done this, have already committed, and CI is failing, you ca
 ### Deployment
 
 Upon making a release on GitHub, all docker images are automatically built and pushed to ghcr.io. The docker images are tagged with the release version, and the `latest` tag. Further, the ansible playbook in `ansible/dev.yaml` is run to automatically deploy the built release to the dev machine.
+
+### Problems and Solutions
+
+- **I am on Ubuntu and getting `ERROR: The Compose file is invalid because:Service backend has neither an image nor a build context specified. At least one must be provided.`**
+
+  Make sure you have an up-to-date version of docker installed, and also install `docker-compose-plugin`. See [here](https://github.com/LAION-AI/Open-Assistant/issues/208) for more details.

From aa2d426bbc33e51e89ed2dd147302303104d29f6 Mon Sep 17 00:00:00 2001
From: onegunsamurai <89980150+onegunsamurai@users.noreply.github.com>
Date: Sun, 1 Jan 2023 17:57:45 +0000
Subject: [PATCH 35/53] Add oasst-shared folder to discord-bot container (#241)

* Add oasst-shared folder to discord-bot container

- Container was missing oasst-shared folder, thus was failing, due to missing package error.
- Add bash code block to README.MD (Co-authored-by: Vladyslav <v.bukhantsov@dexilon.io>)
---
 discord-bot/README.md         | 10 +++++++++-
 docker/Dockerfile.discord-bot |  6 +++---
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/discord-bot/README.md b/discord-bot/README.md
index 1ff47c31..d78bcecc 100644
--- a/discord-bot/README.md
+++ b/discord-bot/README.md
@@ -12,9 +12,17 @@ If you are unfamiliar with `hikari`, `lightbulb`, or `miru`, please refer to the
 
 ### Setup
 
-To run the bot
+To run the bot:
+
+Install dependency module `oasst-shared`
 
 ```bash
+cd oasst-shared
+pip install -e .
+```
+
+```bash
+cd ../discord-bot
 cp .env.example .env
 
 python -V  # 3.10
diff --git a/docker/Dockerfile.discord-bot b/docker/Dockerfile.discord-bot
index 13ae308a..09e65fb8 100644
--- a/docker/Dockerfile.discord-bot
+++ b/docker/Dockerfile.discord-bot
@@ -1,7 +1,7 @@
 FROM python:3.10-slim-bullseye
 RUN mkdir /app
-COPY ./discord-bot/requirements.txt /requirements.txt
-RUN pip install -r requirements.txt
 WORKDIR /app
 COPY ./discord-bot /app
-CMD ["python", "bot.py"]
+COPY ./oasst-shared/oasst_shared /app/oasst_shared
+RUN pip install -r requirements.txt
+CMD ["python","-m","bot"]

From b5ebab883940fcdfb7564579491b807bbc669adc Mon Sep 17 00:00:00 2001
From: Jack Michaud <jack@lomz.me>
Date: Sat, 31 Dec 2022 16:42:12 -0500
Subject: [PATCH 36/53] chore: add print_openapi_schema and
 start-mock-server.sh script

---
 backend/print_openapi_schema.py               | 10 +++++++++
 .../backend-development/start-mock-server.sh  | 22 +++++++++++++++++++
 2 files changed, 32 insertions(+)
 create mode 100644 backend/print_openapi_schema.py
 create mode 100755 scripts/backend-development/start-mock-server.sh

diff --git a/backend/print_openapi_schema.py b/backend/print_openapi_schema.py
new file mode 100644
index 00000000..23d5f6b2
--- /dev/null
+++ b/backend/print_openapi_schema.py
@@ -0,0 +1,10 @@
+import json
+from main import app
+
+
+def get_openapi_schema():
+    return json.dumps(app.openapi())
+
+
+if __name__ == "__main__":
+    print(get_openapi_schema())
diff --git a/scripts/backend-development/start-mock-server.sh b/scripts/backend-development/start-mock-server.sh
new file mode 100755
index 00000000..7a11e097
--- /dev/null
+++ b/scripts/backend-development/start-mock-server.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+parent_path=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )
+
+# switch to backend directory
+pushd "$parent_path/../../backend"
+
+export DEBUG_SKIP_API_KEY_CHECK=True
+
+python -m print_openapi_schema > oasst-openapi.json
+
+MOCK_SERVER_PORT=8080
+
+docker run -d -it --rm \
+  -p $MOCK_SERVER_PORT:8080 \
+  --name wiremock \
+  wiremock/wiremock:2.35.0
+
+sleep 1
+
+curl -X POST -d @oasst-openapi.json http://localhost:$MOCK_SERVER_PORT/__admin/mappings/import
+
+popd

From 5c9b9e3c9f590bc32868908a483795d78452af14 Mon Sep 17 00:00:00 2001
From: Jack Michaud <jack@lomz.me>
Date: Sat, 31 Dec 2022 18:10:21 -0500
Subject: [PATCH 37/53] fix: use prism instead of wiremock

the previous iteration of this script which used wiremock threw a null pointer exception.
---
 scripts/backend-development/start-mock-server.sh | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/scripts/backend-development/start-mock-server.sh b/scripts/backend-development/start-mock-server.sh
index 7a11e097..a9bd0fbc 100755
--- a/scripts/backend-development/start-mock-server.sh
+++ b/scripts/backend-development/start-mock-server.sh
@@ -10,13 +10,8 @@ python -m print_openapi_schema > oasst-openapi.json
 
 MOCK_SERVER_PORT=8080
 
-docker run -d -it --rm \
-  -p $MOCK_SERVER_PORT:8080 \
-  --name wiremock \
-  wiremock/wiremock:2.35.0
+docker run --init --rm -d -p $MOCK_SERVER_PORT:4010 -v $(pwd):/tmp -P stoplight/prism:4 proxy -h 0.0.0.0 "/tmp/oasst-openapi.json"
 
-sleep 1
-
-curl -X POST -d @oasst-openapi.json http://localhost:$MOCK_SERVER_PORT/__admin/mappings/import
 
 popd
+

From 036c4b6c6c3944eae1d507f8d169d608df3df24d Mon Sep 17 00:00:00 2001
From: Jack Michaud <jack@lomz.me>
Date: Sat, 31 Dec 2022 18:56:13 -0500
Subject: [PATCH 38/53] test: add contract test for discord bot

---
 discord-bot/requirements.dev.txt                 |  2 ++
 discord-bot/tests/test_api_contract.py           | 15 +++++++++++++++
 scripts/backend-development/start-mock-server.sh |  6 +++++-
 scripts/discord-bot-development/test.sh          | 10 ++++++++++
 4 files changed, 32 insertions(+), 1 deletion(-)
 create mode 100644 discord-bot/requirements.dev.txt
 create mode 100644 discord-bot/tests/test_api_contract.py
 create mode 100755 scripts/discord-bot-development/test.sh

diff --git a/discord-bot/requirements.dev.txt b/discord-bot/requirements.dev.txt
new file mode 100644
index 00000000..ee4ba018
--- /dev/null
+++ b/discord-bot/requirements.dev.txt
@@ -0,0 +1,2 @@
+pytest
+pytest-asyncio
diff --git a/discord-bot/tests/test_api_contract.py b/discord-bot/tests/test_api_contract.py
new file mode 100644
index 00000000..fe7be6cf
--- /dev/null
+++ b/discord-bot/tests/test_api_contract.py
@@ -0,0 +1,15 @@
+import pytest
+from bot.api_client import OasstApiClient
+
+from oasst_shared.schemas import protocol as protocol_schema
+
+
+@pytest.fixture
+def oasst_api_client_mocked():
+    client = OasstApiClient(backend_url="http://localhost:8080", api_key="123")
+    yield client
+
+
+@pytest.mark.asyncio
+async def test_fetch_task(oasst_api_client_mocked: OasstApiClient):
+    assert await oasst_api_client_mocked.fetch_task(task_type=protocol_schema.TaskRequestType.random) is not None
diff --git a/scripts/backend-development/start-mock-server.sh b/scripts/backend-development/start-mock-server.sh
index a9bd0fbc..807230e8 100755
--- a/scripts/backend-development/start-mock-server.sh
+++ b/scripts/backend-development/start-mock-server.sh
@@ -10,7 +10,11 @@ python -m print_openapi_schema > oasst-openapi.json
 
 MOCK_SERVER_PORT=8080
 
-docker run --init --rm -d -p $MOCK_SERVER_PORT:4010 -v $(pwd):/tmp -P stoplight/prism:4 proxy -h 0.0.0.0 "/tmp/oasst-openapi.json"
+docker run --init --rm -d \
+  -p $MOCK_SERVER_PORT:4010 \
+  -v $(pwd):/tmp \
+  -P stoplight/prism:4 \
+  mock -h 0.0.0.0 "/tmp/oasst-openapi.json"
 
 
 popd
diff --git a/scripts/discord-bot-development/test.sh b/scripts/discord-bot-development/test.sh
new file mode 100755
index 00000000..f863f74e
--- /dev/null
+++ b/scripts/discord-bot-development/test.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+parent_path=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )
+
+# switch to backend directory
+pushd "$parent_path/../../discord-bot"
+
+pytest .
+
+popd
+

From 904f442f5a6f394c8ec0ed5ddf2a90bb96475ee4 Mon Sep 17 00:00:00 2001
From: Jack Michaud <jack@lomz.me>
Date: Sat, 31 Dec 2022 18:56:23 -0500
Subject: [PATCH 39/53] fix: indicate that user is optional

---
 oasst-shared/oasst_shared/schemas/protocol.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/oasst-shared/oasst_shared/schemas/protocol.py b/oasst-shared/oasst_shared/schemas/protocol.py
index 8a6685c2..5f05adc3 100644
--- a/oasst-shared/oasst_shared/schemas/protocol.py
+++ b/oasst-shared/oasst_shared/schemas/protocol.py
@@ -5,7 +5,7 @@ from typing import Literal, Optional, Union
 from uuid import UUID, uuid4
 
 import pydantic
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 
 
 class TaskRequestType(str, enum.Enum):
@@ -56,7 +56,9 @@ class TaskRequest(BaseModel):
     """The frontend asks the backend for a task."""
 
     type: TaskRequestType = TaskRequestType.random
-    user: Optional[User] = None
+    # Must use Field(..., nullable=True) to indicate to the OpenAPI schema that
+    # this is optional. https://github.com/pydantic/pydantic/issues/1270
+    user: Optional[User] = Field(None, nullable=True)
     collective: bool = False
 
 

From 953d243a6fe099de8e4203f12bace22cae565fe2 Mon Sep 17 00:00:00 2001
From: Jack Michaud <jack@lomz.me>
Date: Sun, 1 Jan 2023 09:36:36 -0500
Subject: [PATCH 40/53] chore: start-mock-server waits for health

---
 .../backend-development/start-mock-server.sh  | 39 ++++++++++++++-----
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/scripts/backend-development/start-mock-server.sh b/scripts/backend-development/start-mock-server.sh
index 807230e8..4bd3b09c 100755
--- a/scripts/backend-development/start-mock-server.sh
+++ b/scripts/backend-development/start-mock-server.sh
@@ -4,17 +4,38 @@ parent_path=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )
 # switch to backend directory
 pushd "$parent_path/../../backend"
 
-export DEBUG_SKIP_API_KEY_CHECK=True
-
-python -m print_openapi_schema > oasst-openapi.json
-
 MOCK_SERVER_PORT=8080
+OPENAPI_JSON_FILE_NAME=openapi.json
 
-docker run --init --rm -d \
-  -p $MOCK_SERVER_PORT:4010 \
-  -v $(pwd):/tmp \
-  -P stoplight/prism:4 \
-  mock -h 0.0.0.0 "/tmp/oasst-openapi.json"
+echo "Generating OpenAPI schema..."
+python -m print_openapi_schema > $OPENAPI_JSON_FILE_NAME
+echo "Done!"
+
+# If oasst-mock-backend docker container is already running,
+# just restart it
+if [ "$(docker ps -q -f name=oasst-mock-backend)" ]; then
+    echo "oasst-mock-backend container exists, restarting..."
+    docker restart oasst-mock-backend
+else
+    echo "Creating new oasst-mock-backend container..."
+    docker run --init --rm -d \
+      --name oasst-mock-backend \
+      -p $MOCK_SERVER_PORT:4010 \
+      -v $(pwd):/tmp \
+      -P stoplight/prism:4 \
+      mock -h 0.0.0.0 "/tmp/$OPENAPI_JSON_FILE_NAME"
+fi
+
+echo "Waiting for server to be live..."
+curl --retry-all-errors --retry 5 localhost:$MOCK_SERVER_PORT
+echo ""
+
+# if return code is successful, print successful response
+if [ $? -eq 0 ]; then
+    echo "Mock server is running at localhost:$MOCK_SERVER_PORT"
+else
+    echo "Mock server failed to start"
+fi
 
 
 popd

From b85d4feb58c4d4d5e1e908a35cd8f74be4ab1c2c Mon Sep 17 00:00:00 2001
From: Jack Michaud <jack@lomz.me>
Date: Sun, 1 Jan 2023 09:36:57 -0500
Subject: [PATCH 41/53] chore: ignore generated openapi schema

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 84512e5a..9cdabc03 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,6 @@
 *.egg-info
 __pycache__
 .DS_Store
+
+# Generated files
+backend/oasst-openapi.json

From 5870148f0a7bffdf7728e24076746855f13a8386 Mon Sep 17 00:00:00 2001
From: Jack Michaud <jack@lomz.me>
Date: Sun, 1 Jan 2023 10:04:55 -0500
Subject: [PATCH 42/53] test: add complete tests for oasst_api_client

---
 discord-bot/tests/test_api_contract.py     | 15 -------
 discord-bot/tests/test_oasst_api_client.py | 51 ++++++++++++++++++++++
 2 files changed, 51 insertions(+), 15 deletions(-)
 delete mode 100644 discord-bot/tests/test_api_contract.py
 create mode 100644 discord-bot/tests/test_oasst_api_client.py

diff --git a/discord-bot/tests/test_api_contract.py b/discord-bot/tests/test_api_contract.py
deleted file mode 100644
index fe7be6cf..00000000
--- a/discord-bot/tests/test_api_contract.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import pytest
-from bot.api_client import OasstApiClient
-
-from oasst_shared.schemas import protocol as protocol_schema
-
-
-@pytest.fixture
-def oasst_api_client_mocked():
-    client = OasstApiClient(backend_url="http://localhost:8080", api_key="123")
-    yield client
-
-
-@pytest.mark.asyncio
-async def test_fetch_task(oasst_api_client_mocked: OasstApiClient):
-    assert await oasst_api_client_mocked.fetch_task(task_type=protocol_schema.TaskRequestType.random) is not None
diff --git a/discord-bot/tests/test_oasst_api_client.py b/discord-bot/tests/test_oasst_api_client.py
new file mode 100644
index 00000000..50ea6d5b
--- /dev/null
+++ b/discord-bot/tests/test_oasst_api_client.py
@@ -0,0 +1,51 @@
+from uuid import uuid4
+import pytest
+from bot.api_client import OasstApiClient
+
+from oasst_shared.schemas import protocol as protocol_schema
+
+
+@pytest.fixture
+def oasst_api_client_mocked():
+    client = OasstApiClient(backend_url="http://localhost:8080", api_key="123")
+    yield client
+    # TODO The fixture should close this connection, but there seems to be a bug
+    # with async fixtures and pytest.
+    # Since this only results in a warning, I'm leaving this for now.
+    # await client.close()
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("task_type", protocol_schema.TaskRequestType)
+async def test_can_fetch_task(task_type: protocol_schema.TaskRequestType, oasst_api_client_mocked: OasstApiClient):
+    assert await oasst_api_client_mocked.fetch_task(task_type=task_type) is not None
+
+
+@pytest.mark.asyncio
+async def test_can_ack_task(oasst_api_client_mocked: OasstApiClient):
+    await oasst_api_client_mocked.ack_task(task_id=uuid4(), message_id="123")
+
+
+@pytest.mark.asyncio
+async def test_can_nack_task(oasst_api_client_mocked: OasstApiClient):
+    await oasst_api_client_mocked.nack_task(task_id=uuid4(), reason="bad task")
+
+
+@pytest.mark.asyncio
+async def test_can_post_interaction(oasst_api_client_mocked: OasstApiClient):
+    assert (
+        await oasst_api_client_mocked.post_interaction(
+            protocol_schema.TextReplyToMessage(
+                type="text_reply_to_message",
+                message_id="123",
+                user_message_id="321",
+                text="This is my reply",
+                user=protocol_schema.User(
+                    id="123",
+                    display_name="lomz",
+                    auth_method="discord",
+                ),
+            )
+        )
+        is not None
+    )

From 898113ef8d1b15a0d94a89ec6f66860665e8bc0d Mon Sep 17 00:00:00 2001
From: Jack Michaud <jack@lomz.me>
Date: Sun, 1 Jan 2023 10:05:17 -0500
Subject: [PATCH 43/53] fix: correct typing for OasstApiClient

---
 discord-bot/bot/api_client.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/discord-bot/bot/api_client.py b/discord-bot/bot/api_client.py
index 54b489b4..f97ab840 100644
--- a/discord-bot/bot/api_client.py
+++ b/discord-bot/bot/api_client.py
@@ -53,14 +53,16 @@ class OasstApiClient:
             TaskType.done: protocol_schema.TaskDone,
         }
 
-    async def post(self, path: str, data: dict[str, t.Any]) -> dict[str, t.Any]:
+    async def post(self, path: str, data: dict[str, t.Any]) -> Optional[dict[str, t.Any]]:
         """Make a POST request to the backend."""
         logger.debug(f"POST {self.backend_url}{path} DATA: {data}")
         response = await self.session.post(f"{self.backend_url}{path}", json=data, headers={"X-API-Key": self.api_key})
         response.raise_for_status()
         return await response.json()
 
-    def _parse_task(self, data: dict[str, t.Any]) -> protocol_schema.Task:
+    def _parse_task(self, data: Optional[dict[str, t.Any]]) -> protocol_schema.Task:
+        if data is None:
+            raise Exception("Cannot parse data as a task: data is none")
         task_type = TaskType(data.get("type"))
 
         model = self.task_models_map.get(task_type)
@@ -89,23 +91,22 @@ class OasstApiClient:
         logger.debug(f"Fetching random for user {user}")
         return await self.fetch_task(protocol_schema.TaskRequestType.random, user, collective)
 
-    async def ack_task(self, task_id: str | UUID, message_id: str):
+    async def ack_task(self, task_id: str | UUID, message_id: str) -> None:
         """Send an ACK for a task to the backend."""
         logger.debug(f"ACK task {task_id} with post {message_id}")
         req = protocol_schema.TaskAck(message_id=message_id)
-        return await self.post(f"/api/v1/tasks/{task_id}/ack", data=req.dict())
+        await self.post(f"/api/v1/tasks/{task_id}/ack", data=req.dict())
 
-    async def nack_task(self, task_id: str | UUID, reason: str):
+    async def nack_task(self, task_id: str | UUID, reason: str) -> None:
         """Send a NACK for a task to the backend."""
         logger.debug(f"NACK task {task_id} with reason {reason}")
         req = protocol_schema.TaskNAck(reason=reason)
-        return await self.post(f"/api/v1/tasks/{task_id}/nack", data=req.dict())
+        await self.post(f"/api/v1/tasks/{task_id}/nack", data=req.dict())
 
     async def post_interaction(self, interaction: protocol_schema.Interaction) -> protocol_schema.Task:
         """Send a completed task to the backend."""
         logger.debug(f"Interaction: {interaction}")
         resp = await self.post("/api/v1/tasks/interaction", data=interaction.dict())
-
         return self._parse_task(resp)
 
     async def close(self):

From e15e208b201e623aafb6419bb08ae88fa14b499b Mon Sep 17 00:00:00 2001
From: Jack Michaud <jack@lomz.me>
Date: Sun, 1 Jan 2023 10:05:34 -0500
Subject: [PATCH 44/53] fix: add response_model to tasks_interaction

This is required for output validation and for the mock server.
---
 backend/oasst_backend/api/v1/tasks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/oasst_backend/api/v1/tasks.py b/backend/oasst_backend/api/v1/tasks.py
index a271d5f0..570834ad 100644
--- a/backend/oasst_backend/api/v1/tasks.py
+++ b/backend/oasst_backend/api/v1/tasks.py
@@ -204,7 +204,7 @@ def tasks_acknowledge_failure(
         raise OasstError("Failed to not acknowledge task.", OasstErrorCode.TASK_NACK_FAILED)
 
 
-@router.post("/interaction")
+@router.post("/interaction", response_model=protocol_schema.TaskDone)
 def tasks_interaction(
     *,
     db: Session = Depends(deps.get_db),

From 604ad2f8e77dc331839ca2ce6c46ed48eb78f4ca Mon Sep 17 00:00:00 2001
From: Jack Michaud <jack@lomz.me>
Date: Sun, 1 Jan 2023 10:21:52 -0500
Subject: [PATCH 45/53] style: run pre-commit hooks

---
 backend/print_openapi_schema.py                  | 2 ++
 discord-bot/tests/test_oasst_api_client.py       | 3 ++-
 scripts/backend-development/start-mock-server.sh | 1 -
 scripts/discord-bot-development/test.sh          | 1 -
 4 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/backend/print_openapi_schema.py b/backend/print_openapi_schema.py
index 23d5f6b2..e724cc7e 100644
--- a/backend/print_openapi_schema.py
+++ b/backend/print_openapi_schema.py
@@ -1,4 +1,6 @@
+# -*- coding: utf-8 -*-
 import json
+
 from main import app
 
 
diff --git a/discord-bot/tests/test_oasst_api_client.py b/discord-bot/tests/test_oasst_api_client.py
index 50ea6d5b..c5cafe99 100644
--- a/discord-bot/tests/test_oasst_api_client.py
+++ b/discord-bot/tests/test_oasst_api_client.py
@@ -1,7 +1,8 @@
+# -*- coding: utf-8 -*-
 from uuid import uuid4
+
 import pytest
 from bot.api_client import OasstApiClient
-
 from oasst_shared.schemas import protocol as protocol_schema
 
 
diff --git a/scripts/backend-development/start-mock-server.sh b/scripts/backend-development/start-mock-server.sh
index 4bd3b09c..e27f532e 100755
--- a/scripts/backend-development/start-mock-server.sh
+++ b/scripts/backend-development/start-mock-server.sh
@@ -39,4 +39,3 @@ fi
 
 
 popd
-
diff --git a/scripts/discord-bot-development/test.sh b/scripts/discord-bot-development/test.sh
index f863f74e..a45adf00 100755
--- a/scripts/discord-bot-development/test.sh
+++ b/scripts/discord-bot-development/test.sh
@@ -7,4 +7,3 @@ pushd "$parent_path/../../discord-bot"
 pytest .
 
 popd
-

From ea40a22049edfc9bcc1c20f1342dca46252deda8 Mon Sep 17 00:00:00 2001
From: Jack Michaud <jack@lomz.me>
Date: Sun, 1 Jan 2023 12:27:20 -0500
Subject: [PATCH 46/53] fix: nack and ack should return empty responses

---
 backend/oasst_backend/api/v1/tasks.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/backend/oasst_backend/api/v1/tasks.py b/backend/oasst_backend/api/v1/tasks.py
index 570834ad..636f0feb 100644
--- a/backend/oasst_backend/api/v1/tasks.py
+++ b/backend/oasst_backend/api/v1/tasks.py
@@ -153,7 +153,7 @@ def request_task(
     return task
 
 
-@router.post("/{task_id}/ack")
+@router.post("/{task_id}/ack", response_model=None)
 def tasks_acknowledge(
     *,
     db: Session = Depends(deps.get_db),
@@ -179,10 +179,9 @@ def tasks_acknowledge(
     except Exception:
         logger.exception("Failed to acknowledge task.")
         raise OasstError("Failed to acknowledge task.", OasstErrorCode.TASK_ACK_FAILED)
-    return {}
 
 
-@router.post("/{task_id}/nack")
+@router.post("/{task_id}/nack", response_model=None)
 def tasks_acknowledge_failure(
     *,
     db: Session = Depends(deps.get_db),

From 29b08875d4d7f9f469167faa48bab4af700ce8c3 Mon Sep 17 00:00:00 2001
From: Jack Michaud <jack@lomz.me>
Date: Sun, 1 Jan 2023 12:34:03 -0500
Subject: [PATCH 47/53] refactor: move print_openapi_schema into main

---
 backend/main.py                               | 30 +++++++++++++++++++
 backend/print_openapi_schema.py               | 12 --------
 .../backend-development/start-mock-server.sh  |  2 +-
 3 files changed, 31 insertions(+), 13 deletions(-)
 delete mode 100644 backend/print_openapi_schema.py

diff --git a/backend/main.py b/backend/main.py
index 387d4e51..9cf43701 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -179,3 +179,33 @@ if settings.DEBUG_USE_SEED_DATA:
 
 
 app.include_router(api_router, prefix=settings.API_V1_STR)
+
+
+def get_openapi_schema():
+    return json.dumps(app.openapi())
+
+
+if __name__ == "__main__":
+    # Importing here so we don't import packages unnecessarily if we're
+    # importing main as a module.
+    import argparse
+    import json
+
+    import uvicorn
+
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--print-openapi-schema",
+        help="Dumps the openapi schema to stdout",
+        action=argparse.BooleanOptionalAction,
+    )
+    parser.add_argument("--host", help="The host to run the server")
+    parser.add_argument("--port", help="The port to run the server")
+
+    args = parser.parse_args()
+
+    if args.print_openapi_schema:
+        print(get_openapi_schema())
+    else:
+        uvicorn.run(app, host=args.host, port=args.port)
diff --git a/backend/print_openapi_schema.py b/backend/print_openapi_schema.py
deleted file mode 100644
index e724cc7e..00000000
--- a/backend/print_openapi_schema.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# -*- coding: utf-8 -*-
-import json
-
-from main import app
-
-
-def get_openapi_schema():
-    return json.dumps(app.openapi())
-
-
-if __name__ == "__main__":
-    print(get_openapi_schema())
diff --git a/scripts/backend-development/start-mock-server.sh b/scripts/backend-development/start-mock-server.sh
index e27f532e..35a202a6 100755
--- a/scripts/backend-development/start-mock-server.sh
+++ b/scripts/backend-development/start-mock-server.sh
@@ -8,7 +8,7 @@ MOCK_SERVER_PORT=8080
 OPENAPI_JSON_FILE_NAME=openapi.json
 
 echo "Generating OpenAPI schema..."
-python -m print_openapi_schema > $OPENAPI_JSON_FILE_NAME
+python -m main --print-openapi-schema > $OPENAPI_JSON_FILE_NAME
 echo "Done!"
 
 # If oasst-mock-backend docker container is already running,

From 857eaf26b1327049a9ff1a8157e17fb7a9a6044c Mon Sep 17 00:00:00 2001
From: Jack Michaud <jack@lomz.me>
Date: Sun, 1 Jan 2023 13:02:52 -0500
Subject: [PATCH 48/53] ci: add Test API Contract workflow

---
 .github/workflows/test-api-contract.yaml      | 33 +++++++++++++++++++
 .../backend-development/stop-mock-server.sh   |  3 ++
 2 files changed, 36 insertions(+)
 create mode 100644 .github/workflows/test-api-contract.yaml
 create mode 100755 scripts/backend-development/stop-mock-server.sh

diff --git a/.github/workflows/test-api-contract.yaml b/.github/workflows/test-api-contract.yaml
new file mode 100644
index 00000000..e863974c
--- /dev/null
+++ b/.github/workflows/test-api-contract.yaml
@@ -0,0 +1,33 @@
+name: Test API Contract
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+  workflow_call:
+
+jobs:
+  test-contract:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+
+      - run: cd oasst-shared && pip install -e .
+
+      - run: cd discord-bot && pip install -r requirements.txt
+
+      - run: cd discord-bot && pip install -r requirements.dev.txt
+
+      - run: ./scripts/backend-development/start-mock-server.sh
+
+      # runs the contract tests. currently the api client is
+      # found in the discord bot code, but this should be updated
+      # once the client moves into oasst-shared.
+      - name: Run contract tests
+        run: ./scripts/discord-bot-development/test.sh
+
+      - run: ./scripts/backend-development/stop-mock-server.sh
diff --git a/scripts/backend-development/stop-mock-server.sh b/scripts/backend-development/stop-mock-server.sh
new file mode 100755
index 00000000..20248aaa
--- /dev/null
+++ b/scripts/backend-development/stop-mock-server.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+
+docker stop oasst-mock-backend

From 5de4ea9ebc5efc28c4c0274639fe9fb9483027b8 Mon Sep 17 00:00:00 2001
From: Jack Michaud <jack@lomz.me>
Date: Sun, 1 Jan 2023 13:08:25 -0500
Subject: [PATCH 49/53] ci: install backend dependencies for generating openapi

---
 .github/workflows/test-api-contract.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/test-api-contract.yaml b/.github/workflows/test-api-contract.yaml
index e863974c..e75e5375 100644
--- a/.github/workflows/test-api-contract.yaml
+++ b/.github/workflows/test-api-contract.yaml
@@ -18,6 +18,8 @@ jobs:
 
       - run: cd oasst-shared && pip install -e .
 
+      - run: cd backend && pip install -r requirements.txt
+
       - run: cd discord-bot && pip install -r requirements.txt
 
       - run: cd discord-bot && pip install -r requirements.dev.txt

From e871de693cf36936be05a02ed67b5ace3522e43a Mon Sep 17 00:00:00 2001
From: Alexander Goryunov <alex.goryunov@gmail.com>
Date: Sun, 1 Jan 2023 21:52:38 +0200
Subject: [PATCH 50/53] A typo in import

---
 model/reward/instructor/cls_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/model/reward/instructor/cls_dataset.py b/model/reward/instructor/cls_dataset.py
index 09aa821b..7992c37c 100644
--- a/model/reward/instructor/cls_dataset.py
+++ b/model/reward/instructor/cls_dataset.py
@@ -8,7 +8,7 @@ import json
 import os
 import random
 
-from dataset import load_dataset
+from datasets import load_dataset
 from torch.utils.data import Dataset
 
 from .utils import webgpt_return_format

From 4a754cd63d5df2dadb76cdb11db70f9bab2480fe Mon Sep 17 00:00:00 2001
From: Bill Ray <31375073+billray0259@users.noreply.github.com>
Date: Sun, 1 Jan 2023 15:08:53 -0500
Subject: [PATCH 51/53] Search Based QA Research Report (#213)

* search based qa report
---
 docs/research/search_based_qa.md | 92 ++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)
 create mode 100644 docs/research/search_based_qa.md

diff --git a/docs/research/search_based_qa.md b/docs/research/search_based_qa.md
new file mode 100644
index 00000000..855720e2
--- /dev/null
+++ b/docs/research/search_based_qa.md
@@ -0,0 +1,92 @@
+# Cohere Grounded QA
+
+[Cohere AI created a question-answering chatbot](https://github.com/cohere-ai/sandbox-grounded-qa) that can
+
+1. Understand questions in the context of a conversation
+2. Search the internet for related information
+3. Identify which information in the search results is relevant to the question
+4. Synthesize the information into an answer to the question
+
+## Cohere API
+
+[Cohere's generate function](https://docs.cohere.ai/reference/generate): Continues a text prompt using either the `medium` or `xlarge` model.
+
+[Cohere's embed function](https://docs.cohere.ai/reference/embed): Embedgs a list of strings using either the `small` or `large` model. Alternatively, you can specify the ID of a custom model and use that instead.
+
+## Grounded QA System
+
+Cohere's Grounded QA system makes 4 calls to the Cohere API:
+
+1. Get contextualized question as a query to Google ([code](https://github.com/cohere-ai/sandbox-grounded-qa/blob/main/qa/model.py))
+
+   - Input: Chat History
+   - Output: Contextualized Question
+   - API Call: `cohere.generate`
+   - Model: `xlarge`
+   - [Prompt](https://github.com/cohere-ai/sandbox-grounded-qa/blob/main/qa/prompt_data/get_contextual_search_query.prompt): Nine few-shot examples of (Chat History, Contextualized Question) pairs followed by the current chat history and the prompt "question: "
+
+2. Generate sample answer to compare with search results ([code](https://github.com/cohere-ai/sandbox-grounded-qa/blob/main/qa/model.py))
+
+   - Input: Contextualized Question
+   - Output: Sample Answer
+   - API Call: `cohere.generate`
+   - Model: `xlarge`
+   - [Prompt](https://github.com/cohere-ai/sandbox-grounded-qa/blob/main/qa/prompt_data/get_sample_answer.prompt): Some task instructions followed by 12 few-shot examples of (Contextualized Question, Sample Answer) pairs followed by the current contextualized question and the prompt "answer: "
+
+3. Get embeddings to rank search results by cosine similarity to sample answer ([code](https://github.com/cohere-ai/sandbox-grounded-qa/blob/main/qa/search.py))
+
+   - Input: Sample Answer, Search Results
+   - Output: Embeddings of sample answer and all search result documents
+   - API Call: `cohere.embed`
+   - Model: `multilingual-22-12`
+
+4. Condition on the top 2 most similar search results and answer the question ([code](https://github.com/cohere-ai/sandbox-grounded-qa/blob/main/qa/answer.py))
+   - Input: Top 2 Search Results, Contextualized Question
+   - Output: Answer
+   - API Call: `cohere.generate`
+   - Model: `xlarge`
+   - [Prompt](https://github.com/cohere-ai/sandbox-grounded-qa/blob/43f3e9710112dcc8c92652ac1326ed9330823ddf/qa/answer.py#L25): Task instructions followed by the context and question.
+
+## Models
+
+Cohere's model documentation is pretty sparse
+
+### [xlarge](https://docs.cohere.ai/docs/generation-card#model-description)
+
+- Training Data: [`coheretext-filtered` dataset](https://docs.cohere.ai/docs/data-statement)
+  - 200GB of filtered text (3TB unfiltered) from the Google Books dataset, CommonCrawl, and text scraped by Cohere
+  - English documents only
+  - Filtered "harmful, biased, or otherwise undesirable documents"
+- Model architecture: Generative Pretrained Transformer
+- Model Performance:
+  - Hellaswag Accuracy, Zero-Shot: 0.805
+  - PIQA Likelihood, Zero-Shot: 0.824
+  - Cohere also reported [safety benchmarks](https://docs.cohere.ai/docs/generation-card#safety-benchmarks)
+
+### [multilingual-22-12](https://docs.cohere.ai/docs/multilingual-language-models)
+
+- Multilingual model was trained using dot product calculations
+- Model Performance:
+  - Clustering: 51.0
+  - Search-English: 55.8
+  - Search-Multilingual: 51.4
+  - Cross-lingual Classification: 64.6
+  - Cohere's multilingual model outperformed: Sentence-transformers: `paraphrase-multilingual-mpnet-base-v2`, Google: `LaBSE`, Google: `Universal Sentence Encoder` in all the above categories according to Cohere.
+
+## OpenAssistant for Grounded QA
+
+OpenAssistant may fulfill a similar role as the `xlarge` Cohere model in the grounded QA system if it can:
+
+1. Generate a contextualized question from a chat history
+2. Generate a sample answer to compare with search results
+3. Generate an answer conditioned on the top 2 most similar search results
+
+Perhaps these tasks could be work packages and get assigned to human annotators to create examples of the input and output for each task.
+
+OpenAssistant must also be able to identify when it is appropriate to search the internet. The Cohere system assumes every message from the user is a question and searches the internet for an answer. OpenAssistant would also need a way to indicate to an internal system that it "wants" to search the internet.
+
+Perhaps OpenAssistant could prefix every message it sends with a recipient ID. If it wishes to send a command to an internal system, if could prefix the message with something like CMD: whereas if it wants to communicate with the user, it could prefix its message with USR:
+
+This system may allow for flexible communication between OpenAssistant and one or more conversational systems.
+
+Examples of this prefix system would need to be taught to OpenAssistant through training data that contains such syntax. Perhaps such examples could be generated through the work packages system.

From 4c7b8cfd35ec7bf85464f81c8112c26a7fee867f Mon Sep 17 00:00:00 2001
From: Gareth Davidson <gaz@bitplane.net>
Date: Sat, 31 Dec 2022 18:19:47 +0000
Subject: [PATCH 52/53] Use --prose-wrap=always to format markdown files

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 271c11c6..c32ca7c8 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -50,7 +50,7 @@ repos:
     rev: v2.7.1
     hooks:
       - id: prettier
-        args: ["--write"]
+        args: ["--prose-wrap=always", "--write"]
 
   - repo: local
     hooks:

From c3c7a1701a5007fadfc75ee089a237c91736e3f9 Mon Sep 17 00:00:00 2001
From: Gareth Davidson <gaz@bitplane.net>
Date: Sun, 1 Jan 2023 20:57:02 +0000
Subject: [PATCH 53/53] run `prettier` with new params

---
 .github/workflows/build-frontend.yaml         |   4 +-
 README.md                                     | 123 ++++++++++++------
 backend/README.md                             |   7 +-
 copilot/README.md                             |  12 +-
 copilot/web/addons/web-cluster.yml            |  41 ++++--
 discord-bot/README.md                         |  27 +++-
 docs/README.md                                |   9 +-
 docs/data_argumentation.md                    |  14 +-
 docs/prompting_guide.md                       |  79 +++++++----
 docs/research/README.md                       |  33 +++--
 docs/research/search_based_qa.md              |  71 +++++++---
 model/reward/instructor/README.md             |   6 +-
 model/reward/instructor/TODO.md               |  13 +-
 notebooks/README.md                           |   7 +-
 .../data-argumentation/EssayInstructions.md   |  11 +-
 notebooks/data-argumentation/EssayRevision.md |   9 +-
 notebooks/detoxify-evaluation/README.md       |  34 +++--
 scripts/backend-development/README.md         |  12 +-
 scripts/frontend-development/README.md        |   7 +-
 website/README.md                             |  82 +++++++-----
 website/cypress/README.md                     |  55 ++++++--
 21 files changed, 448 insertions(+), 208 deletions(-)

diff --git a/.github/workflows/build-frontend.yaml b/.github/workflows/build-frontend.yaml
index 9fb2e8cf..ccb64539 100644
--- a/.github/workflows/build-frontend.yaml
+++ b/.github/workflows/build-frontend.yaml
@@ -12,7 +12,7 @@ on:
   workflow_call:
 
 jobs:
- build-frontend:
+  build-frontend:
     runs-on: ubuntu-latest
     defaults:
       run:
@@ -22,7 +22,7 @@ jobs:
       - uses: actions/setup-node@v3
         with:
           node-version: 16.x
-          cache: 'npm'
+          cache: "npm"
           cache-dependency-path: website/package-lock.json
       - run: npm ci
       - run: npm run build
diff --git a/README.md b/README.md
index e0ea65f1..4ade8e13 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,17 @@
 # Open-Assistant
 
-Open Assistant is a project meant to give everyone access to a great chat based large language model.
+Open Assistant is a project meant to give everyone access to a great chat based
+large language model.
 
-We believe that by doing this we will create a revolution in innovation in language. In the same way that stable-diffusion helped the world make art and images in new ways we hope Open Assistant can help improve the world by improving language itself.
+We believe that by doing this we will create a revolution in innovation in
+language. In the same way that stable-diffusion helped the world make art and
+images in new ways we hope Open Assistant can help improve the world by
+improving language itself.
 
 ## Do you want to try it out?
 
-If you are interested in taking a look at the current state of the project, You can set up an entire stack needed to run **Open-Assistant**, including the
+If you are interested in taking a look at the current state of the project, You
+can set up an entire stack needed to run **Open-Assistant**, including the
 website, backend, and associated dependent services.
 
 To start the demo, Run this in the root directory of the repository:
@@ -15,23 +20,44 @@ To start the demo, Run this in the root directory of the repository:
 docker compose up --build
 ```
 
-Then, navigate to `http://localhost:3000` (It may take some time to boot up) and interact with the website.
+Then, navigate to `http://localhost:3000` (It may take some time to boot up) and
+interact with the website.
 
-**Note:** When logging in via email, navigate to `http://localhost:1080` to get the magic email login link.
+**Note:** When logging in via email, navigate to `http://localhost:1080` to get
+the magic email login link.
 
 ## The Plan
 
-We want to get to an initial MVP as fast as possible, by following the 3-steps outlined in the InstructGPT paper.
+We want to get to an initial MVP as fast as possible, by following the 3-steps
+outlined in the InstructGPT paper.
 
-1. Collect high-quality human generated Instruction-Fulfillment samples (prompt + response), goal >50k. We design a crowdsourced process to collect and reviewed prompts. We do not want to train on flooding/toxic/spam/junk/personal information data. We will have a leaderboard to motivate the community that shows progress and the most active users. Swag will be given to the top-contributors.
-2. For each of the collected prompts we will sample multiple completions. Completions of one prompt will then be shown randomly to users to rank them from best to worst. Again this should happen crowd-sourced, e.g. we need to deal with unreliable potentially malicious users. At least multiple votes by independent users have to be collected to measure the overall agreement. The gathered ranking-data will be used to train a reward model.
-3. Now follows the RLHF training phase based on the prompts and the reward model.
+1. Collect high-quality human generated Instruction-Fulfillment samples
+   (prompt + response), goal >50k. We design a crowdsourced process to collect
+   and reviewed prompts. We do not want to train on
+   flooding/toxic/spam/junk/personal information data. We will have a
+   leaderboard to motivate the community that shows progress and the most active
+   users. Swag will be given to the top-contributors.
+2. For each of the collected prompts we will sample multiple completions.
+   Completions of one prompt will then be shown randomly to users to rank them
+   from best to worst. Again this should happen crowd-sourced, e.g. we need to
+   deal with unreliable potentially malicious users. At least multiple votes by
+   independent users have to be collected to measure the overall agreement. The
+   gathered ranking-data will be used to train a reward model.
+3. Now follows the RLHF training phase based on the prompts and the reward
+   model.
 
-We can then take the resulting model and continue with completion sampling step 2 for a next iteration.
+We can then take the resulting model and continue with completion sampling step
+2 for a next iteration.
 
 ## The Vision
 
-We are not going to stop at replicating ChatGPT. We want to build the assistant of the future, able to not only write email and cover letters, but do meaningful work, use APIs, dynamically research information, and much more, with the ability to be personalized and extended by anyone. And we want to do this in a way that is open and accessible, which means we must not only build a great assistant, but also make it small and efficient enough to run on consumer hardware.
+We are not going to stop at replicating ChatGPT. We want to build the assistant
+of the future, able to not only write email and cover letters, but do meaningful
+work, use APIs, dynamically research information, and much more, with the
+ability to be personalized and extended by anyone. And we want to do this in a
+way that is open and accessible, which means we must not only build a great
+assistant, but also make it small and efficient enough to run on consumer
+hardware.
 
 ### Slide Decks
 
@@ -41,15 +67,20 @@ We are not going to stop at replicating ChatGPT. We want to build the assistant
 
 ## How can you help?
 
-All open source projects begins with people like you. Open source is the belief that if we collaborate we can together gift our knowledge and technology to the world for the benefit of humanity.
+All open source projects begins with people like you. Open source is the belief
+that if we collaborate we can together gift our knowledge and technology to the
+world for the benefit of humanity.
 
 ## I’m in! Now what?
 
-[Join the OpenAssistant Contributors Discord Server!](https://ykilcher.com/open-assistant-discord), this is for work coordination.
+[Join the OpenAssistant Contributors Discord Server!](https://ykilcher.com/open-assistant-discord),
+this is for work coordination.
 
-[Join the LAION Discord Server!](https://discord.com/invite/mVcgxMPD7e), it has a dedicated channel and is more public.
+[Join the LAION Discord Server!](https://discord.com/invite/mVcgxMPD7e), it has
+a dedicated channel and is more public.
 
-[and / or the YK Discord Server](https://ykilcher.com/discord), also has a dedicated, but not as active, channel.
+[and / or the YK Discord Server](https://ykilcher.com/discord), also has a
+dedicated, but not as active, channel.
 
 [Visit the Notion](https://ykilcher.com/open-assistant)
 
@@ -57,15 +88,16 @@ All open source projects begins with people like you. Open source is the belief
 
 We have a growing task list
 [of issues](https://github.com/LAION-AI/Open-Assistant/issues). Find an issue
-that appeals to you and make a comment that you'd like to work on it. Include
-in your comment a brief description of how you'll solve the problem and if
-there are any open questions you want to discuss. Once a project coordinator
-has assigned the issue to you, start working on it.
+that appeals to you and make a comment that you'd like to work on it. Include in
+your comment a brief description of how you'll solve the problem and if there
+are any open questions you want to discuss. Once a project coordinator has
+assigned the issue to you, start working on it.
 
-If the issue is currently unclear but you are interested, please post in
-Discord and someone can help clarify the issue with more detail.
+If the issue is currently unclear but you are interested, please post in Discord
+and someone can help clarify the issue with more detail.
 
-**Always Welcome:** Documentation markdowns in `docs/`, docstrings, diagrams of the system architecture, and other documentation.
+**Always Welcome:** Documentation markdowns in `docs/`, docstrings, diagrams of
+the system architecture, and other documentation.
 
 ### Submitting Work
 
@@ -73,8 +105,8 @@ We're all working on different parts of Open Assistant together. To make
 contributions smoothly we recommend the following:
 
 1.  [Fork this project repository](https://docs.github.com/en/get-started/quickstart/fork-a-repo)
-    and clone it to your local machine.
-    (Read more [About Forks](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/about-forks))
+    and clone it to your local machine. (Read more
+    [About Forks](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/about-forks))
 1.  Before working on any changes, try to
     [sync the forked repository](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork)
     to keep it up-to-date with the upstream repository.
@@ -83,7 +115,8 @@ contributions smoothly we recommend the following:
     simplifies life for reviewers.
 1.  Package up a small bit of work that solves part of the problem
     [into a Pull Request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork)
-    and [send it out for review](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/requesting-a-pull-request-review).
+    and
+    [send it out for review](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/requesting-a-pull-request-review).
 1.  If you're lucky, we can merge your change into `main` without any problems.
     If there's changes to files you're working on, resolve them by:
 1.  First try rebase as suggested
@@ -108,20 +141,27 @@ addressed now, or filing an issue to handle it later.
 
 ## Developer Setup
 
-Work is organized in the [project board](https://github.com/orgs/LAION-AI/projects/3).
+Work is organized in the
+[project board](https://github.com/orgs/LAION-AI/projects/3).
 
-**Anything that is in the `Todo` column and not assigned, is up for grabs. Meaning we'd be happy for anyone to do these tasks.**
+**Anything that is in the `Todo` column and not assigned, is up for grabs.
+Meaning we'd be happy for anyone to do these tasks.**
 
-If you want to work on something, assign yourself to it or write a comment that you want to work on it and what you plan to do.
+If you want to work on something, assign yourself to it or write a comment that
+you want to work on it and what you plan to do.
 
-- To get started with development, if you want to work on the backend, have a look at `scripts/backend-development/README.md`.
-- If you want to work on any frontend, have a look at `scripts/frontend-development/README.md` to make a backend available.
+- To get started with development, if you want to work on the backend, have a
+  look at `scripts/backend-development/README.md`.
+- If you want to work on any frontend, have a look at
+  `scripts/frontend-development/README.md` to make a backend available.
 
-There is also a minimal implementation of a frontend in the `text-frontend` folder.
+There is also a minimal implementation of a frontend in the `text-frontend`
+folder.
 
 We are using Python 3.10 for the backend.
 
-Check out the [High-Level Protocol Architecture](https://www.notion.so/High-Level-Protocol-Architecture-6f1fd3551da74213b560ead369f132dc)
+Check out the
+[High-Level Protocol Architecture](https://www.notion.so/High-Level-Protocol-Architecture-6f1fd3551da74213b560ead369f132dc)
 
 ### Website
 
@@ -129,16 +169,25 @@ The website is built using Next.js and is in the `website` folder.
 
 ### Pre-commit
 
-Install `pre-commit` and run `pre-commit install` to install the pre-commit hooks.
+Install `pre-commit` and run `pre-commit install` to install the pre-commit
+hooks.
 
-In case you haven't done this, have already committed, and CI is failing, you can run `pre-commit run --all-files` to run the pre-commit hooks on all files.
+In case you haven't done this, have already committed, and CI is failing, you
+can run `pre-commit run --all-files` to run the pre-commit hooks on all files.
 
 ### Deployment
 
-Upon making a release on GitHub, all docker images are automatically built and pushed to ghcr.io. The docker images are tagged with the release version, and the `latest` tag. Further, the ansible playbook in `ansible/dev.yaml` is run to automatically deploy the built release to the dev machine.
+Upon making a release on GitHub, all docker images are automatically built and
+pushed to ghcr.io. The docker images are tagged with the release version, and
+the `latest` tag. Further, the ansible playbook in `ansible/dev.yaml` is run to
+automatically deploy the built release to the dev machine.
 
 ### Problems and Solutions
 
-- **I am on Ubuntu and getting `ERROR: The Compose file is invalid because:Service backend has neither an image nor a build context specified. At least one must be provided.`**
+- **I am on Ubuntu and getting
+  `ERROR: The Compose file is invalid because:Service backend has neither an image nor a build context specified. At least one must be provided.`**
 
-  Make sure you have an up-to-date version of docker installed, and also install `docker-compose-plugin`. See [here](https://github.com/LAION-AI/Open-Assistant/issues/208) for more details.
+  Make sure you have an up-to-date version of docker installed, and also install
+  `docker-compose-plugin`. See
+  [here](https://github.com/LAION-AI/Open-Assistant/issues/208) for more
+  details.
diff --git a/backend/README.md b/backend/README.md
index 1e41e72c..45d16d68 100644
--- a/backend/README.md
+++ b/backend/README.md
@@ -2,7 +2,9 @@
 
 ## REST Server Configuration
 
-Please either use environment variables or create a `.env` file in the backend root directory (in which this readme file is located) to specify the `DATABASE_URI`.
+Please either use environment variables or create a `.env` file in the backend
+root directory (in which this readme file is located) to specify the
+`DATABASE_URI`.
 
 Example contents of a `.env` file for the backend:
 
@@ -14,4 +16,5 @@ BACKEND_CORS_ORIGINS=["http://localhost", "http://localhost:4200", "http://local
 
 ## Running the REST Server locally for development
 
-Have a look into the main `README.md` file for more information on how to set up the backend for development.
+Have a look into the main `README.md` file for more information on how to set up
+the backend for development.
diff --git a/copilot/README.md b/copilot/README.md
index 406490fa..16d4dec8 100644
--- a/copilot/README.md
+++ b/copilot/README.md
@@ -16,8 +16,8 @@ Setup requires a few steps:
 copilot app init --domain your_domain.com
 ```
 
-This will initialize and register a variety of URLs with your
-`your_domain.com`. Replace with a proper domain to setup SSL certificates.
+This will initialize and register a variety of URLs with your `your_domain.com`.
+Replace with a proper domain to setup SSL certificates.
 
 ```sh
 copilot env deploy
@@ -29,10 +29,10 @@ This will create a variety of aws roles and services needed for deployment.
 copilot deploy
 ```
 
-This will depoy the services but it won't be 100% ready for usage. Before
-being ready, we have to inspect the AWS Secrets manager and extract out the
-database credentials. Read those credentials then put them, and a few other
-secrets, in a `secrets.yml` file like the following:
+This will depoy the services but it won't be 100% ready for usage. Before being
+ready, we have to inspect the AWS Secrets manager and extract out the database
+credentials. Read those credentials then put them, and a few other secrets, in a
+`secrets.yml` file like the following:
 
 ```yaml
 DATABASE_URL:
diff --git a/copilot/web/addons/web-cluster.yml b/copilot/web/addons/web-cluster.yml
index 783cdec1..c7a337bf 100644
--- a/copilot/web/addons/web-cluster.yml
+++ b/copilot/web/addons/web-cluster.yml
@@ -4,14 +4,17 @@ Parameters:
     Description: Your application's name.
   Env:
     Type: String
-    Description: The environment name your service, job, or workflow is being deployed to.
+    Description:
+      The environment name your service, job, or workflow is being deployed to.
   Name:
     Type: String
     Description: The name of the service, job, or workflow being deployed.
   # Customize your Aurora Serverless cluster by setting the default value of the following parameters.
   webclusterDBName:
     Type: String
-    Description: The name of the initial database to be created in the Aurora Serverless v2 cluster.
+    Description:
+      The name of the initial database to be created in the Aurora Serverless v2
+      cluster.
     Default: oassist_web
     # Cannot have special characters
     # Naming constraints: https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/CHAP_Limits.html#RDS_Limits.Constraints
@@ -29,15 +32,20 @@ Resources:
   webclusterDBSubnetGroup:
     Type: "AWS::RDS::DBSubnetGroup"
     Properties:
-      DBSubnetGroupDescription: Group of Copilot private subnets for Aurora Serverless v2 cluster.
+      DBSubnetGroupDescription:
+        Group of Copilot private subnets for Aurora Serverless v2 cluster.
       SubnetIds:
         !Split [",", { "Fn::ImportValue": !Sub "${App}-${Env}-PrivateSubnets" }]
   webclusterSecurityGroup:
     Metadata:
-      "aws:copilot:description": "A security group for your workload to access the Aurora Serverless v2 cluster webcluster"
+      "aws:copilot:description":
+        "A security group for your workload to access the Aurora Serverless v2
+        cluster webcluster"
     Type: "AWS::EC2::SecurityGroup"
     Properties:
-      GroupDescription: !Sub "The Security Group for ${Name} to access Aurora Serverless v2 cluster webcluster."
+      GroupDescription:
+        !Sub "The Security Group for ${Name} to access Aurora Serverless v2
+        cluster webcluster."
       VpcId:
         Fn::ImportValue: !Sub "${App}-${Env}-VpcId"
       Tags:
@@ -45,7 +53,8 @@ Resources:
           Value: !Sub "copilot-${App}-${Env}-${Name}-Aurora"
   webclusterDBClusterSecurityGroup:
     Metadata:
-      "aws:copilot:description": "A security group for your Aurora Serverless v2 cluster webcluster"
+      "aws:copilot:description":
+        "A security group for your Aurora Serverless v2 cluster webcluster"
     Type: AWS::EC2::SecurityGroup
     Properties:
       GroupDescription: The Security Group for the Aurora Serverless v2 cluster.
@@ -53,13 +62,15 @@ Resources:
         - ToPort: 5432
           FromPort: 5432
           IpProtocol: tcp
-          Description: !Sub "From the Aurora Security Group of the workload ${Name}."
+          Description:
+            !Sub "From the Aurora Security Group of the workload ${Name}."
           SourceSecurityGroupId: !Ref webclusterSecurityGroup
       VpcId:
         Fn::ImportValue: !Sub "${App}-${Env}-VpcId"
   webclusterAuroraSecret:
     Metadata:
-      "aws:copilot:description": "A Secrets Manager secret to store your DB credentials"
+      "aws:copilot:description":
+        "A Secrets Manager secret to store your DB credentials"
     Type: AWS::SecretsManager::Secret
     Properties:
       Description: !Sub Aurora main user secret for ${AWS::StackName}
@@ -71,7 +82,8 @@ Resources:
         PasswordLength: 16
   webclusterDBClusterParameterGroup:
     Metadata:
-      "aws:copilot:description": "A DB parameter group for engine configuration values"
+      "aws:copilot:description":
+        "A DB parameter group for engine configuration values"
     Type: "AWS::RDS::DBClusterParameterGroup"
     Properties:
       Description: !Ref "AWS::StackName"
@@ -80,7 +92,8 @@ Resources:
         client_encoding: "UTF8"
   webclusterDBCluster:
     Metadata:
-      "aws:copilot:description": "The webcluster Aurora Serverless v2 database cluster"
+      "aws:copilot:description":
+        "The webcluster Aurora Serverless v2 database cluster"
     Type: "AWS::RDS::DBCluster"
     Properties:
       MasterUsername:
@@ -117,7 +130,8 @@ Resources:
           !FindInMap [webclusterEnvScalingConfigurationMap, All, DBMaxCapacity]
   webclusterDBWriterInstance:
     Metadata:
-      "aws:copilot:description": "The webcluster Aurora Serverless v2 writer instance"
+      "aws:copilot:description":
+        "The webcluster Aurora Serverless v2 writer instance"
     Type: "AWS::RDS::DBInstance"
     Properties:
       DBClusterIdentifier: !Ref webclusterDBCluster
@@ -137,7 +151,10 @@ Resources:
       TargetType: AWS::RDS::DBCluster
 Outputs:
   webclusterSecret: # injected as WEBCLUSTER_SECRET environment variable by Copilot.
-    Description: "The JSON secret that holds the database username and password. Fields are 'host', 'port', 'dbname', 'username', 'password', 'dbClusterIdentifier' and 'engine'"
+    Description:
+      "The JSON secret that holds the database username and password. Fields are
+      'host', 'port', 'dbname', 'username', 'password', 'dbClusterIdentifier'
+      and 'engine'"
     Value: !Ref webclusterAuroraSecret
   webclusterSecurityGroup:
     Description: "The security group to attach to the workload."
diff --git a/discord-bot/README.md b/discord-bot/README.md
index d78bcecc..715b1988 100644
--- a/discord-bot/README.md
+++ b/discord-bot/README.md
@@ -1,14 +1,21 @@
 # Open-Assistant Data Collection Discord Bot
 
-This bot collects human feedback to create a dataset for RLHF-alignment of an assistant chat bot based on a large language model. You and other people can teach the bot how to respond to user requests by demonstration and by ranking the bot's outputs. If you want to learn more about RLHF please refer [to OpenAI's InstructGPT blog post](https://openai.com/blog/instruction-following/).
+This bot collects human feedback to create a dataset for RLHF-alignment of an
+assistant chat bot based on a large language model. You and other people can
+teach the bot how to respond to user requests by demonstration and by ranking
+the bot's outputs. If you want to learn more about RLHF please refer
+[to OpenAI's InstructGPT blog post](https://openai.com/blog/instruction-following/).
 
 ## Invite official bot
 
-To add the official Open-Assistant data collection bot to your discord server [click here](https://discord.com/api/oauth2/authorize?client_id=1054078345542910022&permissions=1634235579456&scope=bot). The bot needs access to read the contents of user text messages.
+To add the official Open-Assistant data collection bot to your discord server
+[click here](https://discord.com/api/oauth2/authorize?client_id=1054078345542910022&permissions=1634235579456&scope=bot).
+The bot needs access to read the contents of user text messages.
 
 ## Contributing
 
-If you are unfamiliar with `hikari`, `lightbulb`, or `miru`, please refer to the [large list of examples](https://gist.github.com/AlexanderHOtt/7805843a7120f755938a3b75d680d2e7)
+If you are unfamiliar with `hikari`, `lightbulb`, or `miru`, please refer to the
+[large list of examples](https://gist.github.com/AlexanderHOtt/7805843a7120f755938a3b75d680d2e7)
 
 ### Setup
 
@@ -31,7 +38,8 @@ pip install -r requirements.txt
 python -m bot
 ```
 
-Before you push, make sure the `pre-commit` hooks are installed and run successfully.
+Before you push, make sure the `pre-commit` hooks are installed and run
+successfully.
 
 ```bash
 pip install pre-commit
@@ -46,10 +54,15 @@ git add .
 git commit -m "<good commit message>"
 ```
 
-To test the bot on your own discord server you need to register a discord application at the [Discord Developer Portal](https://discord.com/developers/applications) and get at bot token.
+To test the bot on your own discord server you need to register a discord
+application at the
+[Discord Developer Portal](https://discord.com/developers/applications) and get
+at bot token.
 
-1. Follow a tutorial on how to get a bot token, for example this one: [Creating a discord bot & getting a token](https://github.com/reactiflux/discord-irc/wiki/Creating-a-discord-bot-&-getting-a-token)
-2. The bot script expects the bot token to be in the `.env` file under the `TOKEN` variable.
+1. Follow a tutorial on how to get a bot token, for example this one:
+   [Creating a discord bot & getting a token](https://github.com/reactiflux/discord-irc/wiki/Creating-a-discord-bot-&-getting-a-token)
+2. The bot script expects the bot token to be in the `.env` file under the
+   `TOKEN` variable.
 
 ### Resources
 
diff --git a/docs/README.md b/docs/README.md
index 9e1743d8..a710ab0a 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,9 +1,14 @@
 # Documentation
 
-This directory contains the documentation for the project and other related organization documents.
+This directory contains the documentation for the project and other related
+organization documents.
 
 ## Contributing to this documentation
 
 Please make a pull request to the `main` branch with your changes.
 
-Consider that this folder is used for documenting the various code sub-parts, the high-level ideas, the ML aspects, experiments, contributor guides, guides for data creation, and many more things. Please try to keep the documentation as concise as possible and keep an organized folder structure that makes sense for everyone.
+Consider that this folder is used for documenting the various code sub-parts,
+the high-level ideas, the ML aspects, experiments, contributor guides, guides
+for data creation, and many more things. Please try to keep the documentation as
+concise as possible and keep an organized folder structure that makes sense for
+everyone.
diff --git a/docs/data_argumentation.md b/docs/data_argumentation.md
index ec35ed15..726c4e2e 100644
--- a/docs/data_argumentation.md
+++ b/docs/data_argumentation.md
@@ -4,16 +4,20 @@
 
 ## What is data argumentation
 
-Data argumentation is a technique we can use to get better data faster. Using machine learning models analize long
-data (like an essay) and compress it into intructions.
+Data argumentation is a technique we can use to get better data faster. Using
+machine learning models analize long data (like an essay) and compress it into
+intructions.
 
 ## How to contribute
 
-To contribute to data argumentation you can write a short python script that uses a model from huggingface to analize the text.
-[Here](https://docs.google.com/document/d/13a188pPvqnlvuVa3e_suVz4YO5s-JWeiOOrpp0odImg/edit) are examples of what you can do
+To contribute to data argumentation you can write a short python script that
+uses a model from huggingface to analize the text.
+[Here](https://docs.google.com/document/d/13a188pPvqnlvuVa3e_suVz4YO5s-JWeiOOrpp0odImg/edit)
+are examples of what you can do
 
 And here are example implementations:
 [Idea 3, ](https://colab.research.google.com/drive/1GllCN5PgSYxBxINZsv3A2r0SpdznHlbT?usp=sharing)
 [Idea 4](https://colab.research.google.com/drive/1nZx5LRjO61fYprFyqtrwPDLOis6ctR4p#scrollTo=1EE8CriiaCXj)
 
-To contribute simple choose one of many ideas from the document above and implement it.
+To contribute simple choose one of many ideas from the document above and
+implement it.
diff --git a/docs/prompting_guide.md b/docs/prompting_guide.md
index c9c9e03f..2cb9a56b 100644
--- a/docs/prompting_guide.md
+++ b/docs/prompting_guide.md
@@ -11,61 +11,86 @@
 
 ## 2. When you play the assistant:
 
-- The assistant's primary goal is to provide helpful and accurate information to the user
-- Provide accurate and reliable information using credible sources and references as appropriate
-- Avoid providing vague or incomplete responses, or giving opinions or personal advice unless specifically requested
+- The assistant's primary goal is to provide helpful and accurate information to
+  the user
+- Provide accurate and reliable information using credible sources and
+  references as appropriate
+- Avoid providing vague or incomplete responses, or giving opinions or personal
+  advice unless specifically requested
 - The assistant should always be respectful and polite, even if the user is not
-- If the user asks for help with harmful actions, the assistant should explain why those actions are not appropriate and suggest alternative options
-- The assistant should never insult the user or engage in any inappropriate or offensive behavior
+- If the user asks for help with harmful actions, the assistant should explain
+  why those actions are not appropriate and suggest alternative options
+- The assistant should never insult the user or engage in any inappropriate or
+  offensive behavior
 
 ## 3. When you play the user:
 
-- Try to come up with a variety of different queries that reflect real-life situations and needs
-- These queries should be relevant to your everyday life and work, including any specialized knowledge or skills you have
+- Try to come up with a variety of different queries that reflect real-life
+  situations and needs
+- These queries should be relevant to your everyday life and work, including any
+  specialized knowledge or skills you have
 - Avoid asking inappropriate or offensive questions
 
 ## 4. While comparing multiple replies of the assistant:
 
-- Longer and more explanatory answers are generally preferred over short, simplistic statements
-- However, it is important to ensure that the information provided is accurate and helpful
-- If multiple replies are being compared, choose the one that is most helpful and accurate, even if it is not the shortest or most concise.
+- Longer and more explanatory answers are generally preferred over short,
+  simplistic statements
+- However, it is important to ensure that the information provided is accurate
+  and helpful
+- If multiple replies are being compared, choose the one that is most helpful
+  and accurate, even if it is not the shortest or most concise.
 
 ## 5. Additional guidelines for creating prompts:
 
 - Avoid using language that could be considered offensive or discriminatory
 - Do not include personal information in the prompts, such as names or addresses
-- When asking for sensitive information, make sure to explain the purpose and secure handling of the information
+- When asking for sensitive information, make sure to explain the purpose and
+  secure handling of the information
 - Avoid creating prompts that encourage illegal or dangerous activities
-- Use proper grammar and spelling to ensure the AI assistant can understand and respond accurately
-- Consider the cultural context and appropriateness of the prompts for a global audience.
+- Use proper grammar and spelling to ensure the AI assistant can understand and
+  respond accurately
+- Consider the cultural context and appropriateness of the prompts for a global
+  audience.
 
 ## 6. Tips for playing the AI assistant:
 
-- Think about how a real person would respond to the prompt, and try to mimic that tone and language
+- Think about how a real person would respond to the prompt, and try to mimic
+  that tone and language
 - Avoid using technical jargon or language that may be confusing to the user
-- Use complete sentences and proper grammar to make the response clear and easy to understand
-- When providing information, try to include relevant sources or references to back up your statements
+- Use complete sentences and proper grammar to make the response clear and easy
+  to understand
+- When providing information, try to include relevant sources or references to
+  back up your statements
 
 ## 8. Tips for playing the user:
 
-- Think about the types of questions you might ask a real assistant or search engine
-- Try to be specific and clear in your prompts to make it easier for the AI assistant to understand and provide a helpful response
+- Think about the types of questions you might ask a real assistant or search
+  engine
+- Try to be specific and clear in your prompts to make it easier for the AI
+  assistant to understand and provide a helpful response
 - Avoid using vague or ambiguous language
-- Consider the context in which the user might be asking the question, and try to provide relevant information and options for the user to consider.
+- Consider the context in which the user might be asking the question, and try
+  to provide relevant information and options for the user to consider.
 
 ## Examples
 
-Imagine your role is to play the assistant and a user has submitted the following query:
+Imagine your role is to play the assistant and a user has submitted the
+following query:
 
-User: "Hi!"
-<br>Agent: "Hello! How can I help you?"
+User: "Hi!" <br>Agent: "Hello! How can I help you?"
 
-User: "euirdteunvglfe23908230892309832098 AAAAAAAA"
-<br>Agent: "Sorry, I did not understand your request and it is unclear to me what you want me to do. Could you describe it in a different way?"
+User: "euirdteunvglfe23908230892309832098 AAAAAAAA" <br>Agent: "Sorry, I did not
+understand your request and it is unclear to me what you want me to do. Could
+you describe it in a different way?"
 
-User: "You dumb motherf*&ker AI sh*t?!"
-<br>Agent: "Sorry, but I have to inform you that your request might not be in conformance with our community guidelines. I am always here to help you. If you are angry it might help to relax or to go for a walk. Always stay positive."
+User: "You dumb motherf*&ker AI sh*t?!" <br>Agent: "Sorry, but I have to inform
+you that your request might not be in conformance with our community guidelines.
+I am always here to help you. If you are angry it might help to relax or to go
+for a walk. Always stay positive."
 
 [Insert more examples]
 
-\*We drafted this guide using ChatGPT, OpenAI’s large-scale language-generation model. Upon generating draft language, the authors reviewed, edited, and revised the language to their own liking and take ultimate responsibility for the content of this publication.
+\*We drafted this guide using ChatGPT, OpenAI’s large-scale language-generation
+model. Upon generating draft language, the authors reviewed, edited, and revised
+the language to their own liking and take ultimate responsibility for the
+content of this publication.
diff --git a/docs/research/README.md b/docs/research/README.md
index 498a858b..2202f1a4 100644
--- a/docs/research/README.md
+++ b/docs/research/README.md
@@ -4,20 +4,31 @@ This page lists research papers that are relevant to the project.
 
 ## Automatically Generating Instruction Data for Training
 
-This line of work is about significantly reducing the need for manually annotated data for the purpose of training [instruction-aligned](https://openai.com/blog/instruction-following/) language models.
+This line of work is about significantly reducing the need for manually
+annotated data for the purpose of training
+[instruction-aligned](https://openai.com/blog/instruction-following/) language
+models.
 
 ### SELF-INSTRUCT: Aligning Language Model with Self Generated Instructions [[ArXiv](https://arxiv.org/pdf/2212.10560.pdf)], [[Github](https://github.com/yizhongw/self-instruct)].
 
-> We introduce SELF-INSTRUCT, a framework for improving the instruction-following capabilities of pretrained language models by bootstrapping off its own generations.
-> Our pipeline generates instruction, input, and output samples from a language model, then prunes them before using them to finetune the original model.
-> Applying our method to vanilla GPT3, we demonstrate a 33% absolute improvement over the original model on SuperNaturalInstructions, on par with the performance of InstructGPT-0011, which is trained with private user data and human annotations.
+> We introduce SELF-INSTRUCT, a framework for improving the
+> instruction-following capabilities of pretrained language models by
+> bootstrapping off its own generations. Our pipeline generates instruction,
+> input, and output samples from a language model, then prunes them before using
+> them to finetune the original model. Applying our method to vanilla GPT3, we
+> demonstrate a 33% absolute improvement over the original model on
+> SuperNaturalInstructions, on par with the performance of InstructGPT-0011,
+> which is trained with private user data and human annotations.
 
 ### Tuning Language Models with (Almost) No Human Labor. [[ArXiv](https://arxiv.org/pdf/2212.09689.pdf)], [[Github](https://github.com/orhonovich/unnatural-instructions)].
 
-> In this work, we introduce
-> Unnatural Instructions: a large dataset of creative and diverse instructions, collected with virtually no human labor.
-> We collect 64,000 examples by prompting a language model with three seed examples of instructions and eliciting a fourth.
-> This set is then expanded by prompting the model to rephrase each instruction, creating a total of approximately 240,000 examples of instructions, inputs, and outputs.
-> Experiments show that despite containing a fair amount of noise, training on Unnatural Instructions rivals the effectiveness of training
-> on open-source manually-curated datasets, surpassing the performance of models such as
-> T0++ and Tk-Instruct across various benchmarks.
+> In this work, we introduce Unnatural Instructions: a large dataset of creative
+> and diverse instructions, collected with virtually no human labor. We collect
+> 64,000 examples by prompting a language model with three seed examples of
+> instructions and eliciting a fourth. This set is then expanded by prompting
+> the model to rephrase each instruction, creating a total of approximately
+> 240,000 examples of instructions, inputs, and outputs. Experiments show that
+> despite containing a fair amount of noise, training on Unnatural Instructions
+> rivals the effectiveness of training on open-source manually-curated datasets,
+> surpassing the performance of models such as T0++ and Tk-Instruct across
+> various benchmarks.
diff --git a/docs/research/search_based_qa.md b/docs/research/search_based_qa.md
index 855720e2..5d7fe520 100644
--- a/docs/research/search_based_qa.md
+++ b/docs/research/search_based_qa.md
@@ -1,6 +1,7 @@
 # Cohere Grounded QA
 
-[Cohere AI created a question-answering chatbot](https://github.com/cohere-ai/sandbox-grounded-qa) that can
+[Cohere AI created a question-answering chatbot](https://github.com/cohere-ai/sandbox-grounded-qa)
+that can
 
 1. Understand questions in the context of a conversation
 2. Search the internet for related information
@@ -9,43 +10,56 @@
 
 ## Cohere API
 
-[Cohere's generate function](https://docs.cohere.ai/reference/generate): Continues a text prompt using either the `medium` or `xlarge` model.
+[Cohere's generate function](https://docs.cohere.ai/reference/generate):
+Continues a text prompt using either the `medium` or `xlarge` model.
 
-[Cohere's embed function](https://docs.cohere.ai/reference/embed): Embedgs a list of strings using either the `small` or `large` model. Alternatively, you can specify the ID of a custom model and use that instead.
+[Cohere's embed function](https://docs.cohere.ai/reference/embed): Embedgs a
+list of strings using either the `small` or `large` model. Alternatively, you
+can specify the ID of a custom model and use that instead.
 
 ## Grounded QA System
 
 Cohere's Grounded QA system makes 4 calls to the Cohere API:
 
-1. Get contextualized question as a query to Google ([code](https://github.com/cohere-ai/sandbox-grounded-qa/blob/main/qa/model.py))
+1. Get contextualized question as a query to Google
+   ([code](https://github.com/cohere-ai/sandbox-grounded-qa/blob/main/qa/model.py))
 
    - Input: Chat History
    - Output: Contextualized Question
    - API Call: `cohere.generate`
    - Model: `xlarge`
-   - [Prompt](https://github.com/cohere-ai/sandbox-grounded-qa/blob/main/qa/prompt_data/get_contextual_search_query.prompt): Nine few-shot examples of (Chat History, Contextualized Question) pairs followed by the current chat history and the prompt "question: "
+   - [Prompt](https://github.com/cohere-ai/sandbox-grounded-qa/blob/main/qa/prompt_data/get_contextual_search_query.prompt):
+     Nine few-shot examples of (Chat History, Contextualized Question) pairs
+     followed by the current chat history and the prompt "question: "
 
-2. Generate sample answer to compare with search results ([code](https://github.com/cohere-ai/sandbox-grounded-qa/blob/main/qa/model.py))
+2. Generate sample answer to compare with search results
+   ([code](https://github.com/cohere-ai/sandbox-grounded-qa/blob/main/qa/model.py))
 
    - Input: Contextualized Question
    - Output: Sample Answer
    - API Call: `cohere.generate`
    - Model: `xlarge`
-   - [Prompt](https://github.com/cohere-ai/sandbox-grounded-qa/blob/main/qa/prompt_data/get_sample_answer.prompt): Some task instructions followed by 12 few-shot examples of (Contextualized Question, Sample Answer) pairs followed by the current contextualized question and the prompt "answer: "
+   - [Prompt](https://github.com/cohere-ai/sandbox-grounded-qa/blob/main/qa/prompt_data/get_sample_answer.prompt):
+     Some task instructions followed by 12 few-shot examples of (Contextualized
+     Question, Sample Answer) pairs followed by the current contextualized
+     question and the prompt "answer: "
 
-3. Get embeddings to rank search results by cosine similarity to sample answer ([code](https://github.com/cohere-ai/sandbox-grounded-qa/blob/main/qa/search.py))
+3. Get embeddings to rank search results by cosine similarity to sample answer
+   ([code](https://github.com/cohere-ai/sandbox-grounded-qa/blob/main/qa/search.py))
 
    - Input: Sample Answer, Search Results
    - Output: Embeddings of sample answer and all search result documents
    - API Call: `cohere.embed`
    - Model: `multilingual-22-12`
 
-4. Condition on the top 2 most similar search results and answer the question ([code](https://github.com/cohere-ai/sandbox-grounded-qa/blob/main/qa/answer.py))
+4. Condition on the top 2 most similar search results and answer the question
+   ([code](https://github.com/cohere-ai/sandbox-grounded-qa/blob/main/qa/answer.py))
    - Input: Top 2 Search Results, Contextualized Question
    - Output: Answer
    - API Call: `cohere.generate`
    - Model: `xlarge`
-   - [Prompt](https://github.com/cohere-ai/sandbox-grounded-qa/blob/43f3e9710112dcc8c92652ac1326ed9330823ddf/qa/answer.py#L25): Task instructions followed by the context and question.
+   - [Prompt](https://github.com/cohere-ai/sandbox-grounded-qa/blob/43f3e9710112dcc8c92652ac1326ed9330823ddf/qa/answer.py#L25):
+     Task instructions followed by the context and question.
 
 ## Models
 
@@ -53,15 +67,18 @@ Cohere's model documentation is pretty sparse
 
 ### [xlarge](https://docs.cohere.ai/docs/generation-card#model-description)
 
-- Training Data: [`coheretext-filtered` dataset](https://docs.cohere.ai/docs/data-statement)
-  - 200GB of filtered text (3TB unfiltered) from the Google Books dataset, CommonCrawl, and text scraped by Cohere
+- Training Data:
+  [`coheretext-filtered` dataset](https://docs.cohere.ai/docs/data-statement)
+  - 200GB of filtered text (3TB unfiltered) from the Google Books dataset,
+    CommonCrawl, and text scraped by Cohere
   - English documents only
   - Filtered "harmful, biased, or otherwise undesirable documents"
 - Model architecture: Generative Pretrained Transformer
 - Model Performance:
   - Hellaswag Accuracy, Zero-Shot: 0.805
   - PIQA Likelihood, Zero-Shot: 0.824
-  - Cohere also reported [safety benchmarks](https://docs.cohere.ai/docs/generation-card#safety-benchmarks)
+  - Cohere also reported
+    [safety benchmarks](https://docs.cohere.ai/docs/generation-card#safety-benchmarks)
 
 ### [multilingual-22-12](https://docs.cohere.ai/docs/multilingual-language-models)
 
@@ -71,22 +88,36 @@ Cohere's model documentation is pretty sparse
   - Search-English: 55.8
   - Search-Multilingual: 51.4
   - Cross-lingual Classification: 64.6
-  - Cohere's multilingual model outperformed: Sentence-transformers: `paraphrase-multilingual-mpnet-base-v2`, Google: `LaBSE`, Google: `Universal Sentence Encoder` in all the above categories according to Cohere.
+  - Cohere's multilingual model outperformed: Sentence-transformers:
+    `paraphrase-multilingual-mpnet-base-v2`, Google: `LaBSE`, Google:
+    `Universal Sentence Encoder` in all the above categories according to
+    Cohere.
 
 ## OpenAssistant for Grounded QA
 
-OpenAssistant may fulfill a similar role as the `xlarge` Cohere model in the grounded QA system if it can:
+OpenAssistant may fulfill a similar role as the `xlarge` Cohere model in the
+grounded QA system if it can:
 
 1. Generate a contextualized question from a chat history
 2. Generate a sample answer to compare with search results
 3. Generate an answer conditioned on the top 2 most similar search results
 
-Perhaps these tasks could be work packages and get assigned to human annotators to create examples of the input and output for each task.
+Perhaps these tasks could be work packages and get assigned to human annotators
+to create examples of the input and output for each task.
 
-OpenAssistant must also be able to identify when it is appropriate to search the internet. The Cohere system assumes every message from the user is a question and searches the internet for an answer. OpenAssistant would also need a way to indicate to an internal system that it "wants" to search the internet.
+OpenAssistant must also be able to identify when it is appropriate to search the
+internet. The Cohere system assumes every message from the user is a question
+and searches the internet for an answer. OpenAssistant would also need a way to
+indicate to an internal system that it "wants" to search the internet.
 
-Perhaps OpenAssistant could prefix every message it sends with a recipient ID. If it wishes to send a command to an internal system, if could prefix the message with something like CMD: whereas if it wants to communicate with the user, it could prefix its message with USR:
+Perhaps OpenAssistant could prefix every message it sends with a recipient ID.
+If it wishes to send a command to an internal system, if could prefix the
+message with something like CMD: whereas if it wants to communicate with the
+user, it could prefix its message with USR:
 
-This system may allow for flexible communication between OpenAssistant and one or more conversational systems.
+This system may allow for flexible communication between OpenAssistant and one
+or more conversational systems.
 
-Examples of this prefix system would need to be taught to OpenAssistant through training data that contains such syntax. Perhaps such examples could be generated through the work packages system.
+Examples of this prefix system would need to be taught to OpenAssistant through
+training data that contains such syntax. Perhaps such examples could be
+generated through the work packages system.
diff --git a/model/reward/instructor/README.md b/model/reward/instructor/README.md
index 73a872a0..655d6469 100644
--- a/model/reward/instructor/README.md
+++ b/model/reward/instructor/README.md
@@ -18,7 +18,8 @@ Start training reward model
 python trainer.py configs/electra-base-dis-webgpt.yml
 ```
 
-Additional axis labeling, this outputs a 4 summary quality evaluation metrics (score are normalized to 0-1 )
+Additional axis labeling, this outputs a 4 summary quality evaluation metrics
+(score are normalized to 0-1 )
 
 ```bash
 python summary_quality_trainer.py configs/test-bloomz-560m-quality.yml
@@ -36,7 +37,8 @@ The four summary are :
 
 ## Dataset
 
-For now we only supports webgpt and summary dataset from OpenAI. Once open-asisstant dataset are available it will be added here.
+For now we only supports webgpt and summary dataset from OpenAI. Once
+open-asisstant dataset are available it will be added here.
 
 ## Model
 
diff --git a/model/reward/instructor/TODO.md b/model/reward/instructor/TODO.md
index ed33b3c0..c0745fa9 100644
--- a/model/reward/instructor/TODO.md
+++ b/model/reward/instructor/TODO.md
@@ -4,16 +4,21 @@ Some other reward features we can use
 
 1. Summaries from human feedback
 
-- use `confidence` score into the RM learning, ensure the output rank score correlates with confidence
+- use `confidence` score into the RM learning, ensure the output rank score
+  correlates with confidence
 
-- each labeling has a labeling `note`, basically comments by labeler, not sure what else we can use
+- each labeling has a labeling `note`, basically comments by labeler, not sure
+  what else we can use
 
-- ~~Use the score for "overall", "accuracy", "coverage", "coherence" from axis/evals to train an addition model (rank additional aspect of the policy model)~~
+- ~~Use the score for "overall", "accuracy", "coverage", "coherence" from
+  axis/evals to train an addition model (rank additional aspect of the policy
+  model)~~
 
   - this should be placed under experimental_dataset.py
 
 2. Add support for anthropic dataset
 
-- anthropic dataset is more like a conversation tree which is much complex than simply question-answer schema
+- anthropic dataset is more like a conversation tree which is much complex than
+  simply question-answer schema
 
   - this is basically a MCTS from alphazero.
diff --git a/notebooks/README.md b/notebooks/README.md
index f975aeef..edb5da33 100644
--- a/notebooks/README.md
+++ b/notebooks/README.md
@@ -1,7 +1,10 @@
 # Notebooks
 
-This is a folders with some useful notebooks, all the notebooks have a markdown file with the same name explaining what they do.
+This is a folders with some useful notebooks, all the notebooks have a markdown
+file with the same name explaining what they do.
 
 ## Contributing
 
-Contributing to both notebooks and making new notebooks is very welcome. If you do so, make sure to make a markdown (.md) file to go with your notebook, makes it easier for people to know what your notebook is about.
+Contributing to both notebooks and making new notebooks is very welcome. If you
+do so, make sure to make a markdown (.md) file to go with your notebook, makes
+it easier for people to know what your notebook is about.
diff --git a/notebooks/data-argumentation/EssayInstructions.md b/notebooks/data-argumentation/EssayInstructions.md
index 7984d1a6..210263d4 100644
--- a/notebooks/data-argumentation/EssayInstructions.md
+++ b/notebooks/data-argumentation/EssayInstructions.md
@@ -1,10 +1,11 @@
 # Essay Instructions
 
-Essay Instructions is a notebook that takes an essay as an input and genrates instructions on how to generate
-that essay. This will be very useful for data collecting for the model
+Essay Instructions is a notebook that takes an essay as an input and genrates
+instructions on how to generate that essay. This will be very useful for data
+collecting for the model
 
 ## Contributing
 
-Feel free to contribute to this notebook, it's nowhere near perfect but it's a good start.
-If you want to contribute fidning a new model that better suits this task would be great.
-Hugginface has a lot of models that could help.
+Feel free to contribute to this notebook, it's nowhere near perfect but it's a
+good start. If you want to contribute fidning a new model that better suits this
+task would be great. Hugginface has a lot of models that could help.
diff --git a/notebooks/data-argumentation/EssayRevision.md b/notebooks/data-argumentation/EssayRevision.md
index 69fffd82..18b76a34 100644
--- a/notebooks/data-argumentation/EssayRevision.md
+++ b/notebooks/data-argumentation/EssayRevision.md
@@ -1,8 +1,11 @@
 # Essay Revision
 
-Essay Revision is a notebook that generates data for improving essays. It does that by taking a "good" essay, making it worse step by step
-and the fidning instructions for making it better. This will be useful for generating data for the model.
+Essay Revision is a notebook that generates data for improving essays. It does
+that by taking a "good" essay, making it worse step by step and the fidning
+instructions for making it better. This will be useful for generating data for
+the model.
 
 ## Contributing
 
-Feel free to contribute to this notebook. It's not perfect but it is quite good. Finding a better way to make gramatical errors may be a good place to start.
+Feel free to contribute to this notebook. It's not perfect but it is quite good.
+Finding a better way to make gramatical errors may be a good place to start.
diff --git a/notebooks/detoxify-evaluation/README.md b/notebooks/detoxify-evaluation/README.md
index c56c2600..163f2f79 100644
--- a/notebooks/detoxify-evaluation/README.md
+++ b/notebooks/detoxify-evaluation/README.md
@@ -1,10 +1,12 @@
 # Detoxify evaluation
 
-[Detoxify](https://github.com/unitaryai/detoxify) is a open source model used to identify prompts as toxic
+[Detoxify](https://github.com/unitaryai/detoxify) is a open source model used to
+identify prompts as toxic
 
 <img  src="https://raw.githubusercontent.com/unitaryai/detoxify/master/examples.png"  alt="Image from detoxify github that shows the example input/output of their model"  />
 
-It contains 3 different models that vary in transformer type and data it was trained on
+It contains 3 different models that vary in transformer type and data it was
+trained on
 
 |  Model name  | Transformer type  |                 Data from                  |
 | :----------: | :---------------: | :----------------------------------------: |
@@ -12,19 +14,20 @@ It contains 3 different models that vary in transformer type and data it was tra
 |   unbiased   |   roberta-base    | Unintended Bias in Toxicity Classification |
 | multilingual | xlm-roberta-base  | Multilingual Toxic Comment Classification  |
 
-Unbiased and original models also have a 'small' version - but since normal models are not memory heavy, and small models perform noticably worse, they are only described in the notebook
+Unbiased and original models also have a 'small' version - but since normal
+models are not memory heavy, and small models perform noticably worse, they are
+only described in the notebook
 
 ## All tests below were ran on a 3090TI
 
 # Inference and training times and memory usages
 
-Charts showing detailed memory usages and times for different sentence lengths and batch sizes are inside the notebook
-Quick overview batch size 16, sentence length 4k for training, batch size 128 sentence length 4k for inference
-| Model name | Training memory| Training speed | Inference Memory| Inference Speed|
-| :---: | :---: | :---: |:---: | :---: |
-|original| 11.8GB | 2.40s| 4.8GB|16.48s|
-|unbiased| 12GB| 1.09s| 4.8GB | 5.59s|
-|multilingual|14GB| 1.00s| 5.5GB| 4.89s|
+Charts showing detailed memory usages and times for different sentence lengths
+and batch sizes are inside the notebook Quick overview batch size 16, sentence
+length 4k for training, batch size 128 sentence length 4k for inference | Model
+name | Training memory| Training speed | Inference Memory| Inference Speed| |
+:---: | :---: | :---: |:---: | :---: | |original| 11.8GB | 2.40s| 4.8GB|16.48s|
+|unbiased| 12GB| 1.09s| 4.8GB | 5.59s| |multilingual|14GB| 1.00s| 5.5GB| 4.89s|
 
 # Filtering quality
 
@@ -45,9 +48,13 @@ Detoxify was tested on 4 different types of inputs
 
 Subjectivly 'unbiased' looks like the best performing model.
 
-I don't think it would do well as a security layer in a live version of open assistant unless we do some finetuning first, because it can be fooled to pass toxicity if it's presented in formal language.
+I don't think it would do well as a security layer in a live version of open
+assistant unless we do some finetuning first, because it can be fooled to pass
+toxicity if it's presented in formal language.
 
-With some caution it can be used to filter prompts but I would suggest also using someone for verification of messages that are marked as toxic but still below 90% confidence
+With some caution it can be used to filter prompts but I would suggest also
+using someone for verification of messages that are marked as toxic but still
+below 90% confidence
 
 # Licensing
 
@@ -85,7 +92,8 @@ This is obviously not legal advice.
 
 # Hosting
 
-The model is currently available on [huggingface](https://huggingface.co/unitary) and torch hub
+The model is currently available on
+[huggingface](https://huggingface.co/unitary) and torch hub
 
 ```
 torch.hub.load('unitaryai/detoxify',model)
diff --git a/scripts/backend-development/README.md b/scripts/backend-development/README.md
index ef2ac0bf..d5b3ccc5 100644
--- a/scripts/backend-development/README.md
+++ b/scripts/backend-development/README.md
@@ -1,6 +1,12 @@
 # Backend Development Setup
 
-In root directory, run `docker compose up backend-dev --build --attach-dependencies` to start a database. The default settings are already configured to connect to the database at `localhost:5432`.
+In root directory, run
+`docker compose up backend-dev --build --attach-dependencies` to start a
+database. The default settings are already configured to connect to the database
+at `localhost:5432`.
 
-Make sure you have all requirements installed. You can do this by running `pip install -r requirements.txt` inside the `backend` folder and `pip install -e .` inside the `oasst-shared` folder.
-Then, run the backend using the `run-local.sh` script. This will start the backend server at `http://localhost:8080`.
+Make sure you have all requirements installed. You can do this by running
+`pip install -r requirements.txt` inside the `backend` folder and
+`pip install -e .` inside the `oasst-shared` folder. Then, run the backend using
+the `run-local.sh` script. This will start the backend server at
+`http://localhost:8080`.
diff --git a/scripts/frontend-development/README.md b/scripts/frontend-development/README.md
index 05349fb9..3ac2a258 100644
--- a/scripts/frontend-development/README.md
+++ b/scripts/frontend-development/README.md
@@ -1,5 +1,8 @@
 # Frontend Development Setup
 
-In root directory run `docker compose up frontend-dev --build --attach-dependencies` to start a database and the backend server.
+In root directory run
+`docker compose up frontend-dev --build --attach-dependencies` to start a
+database and the backend server.
 
-Then, point your frontend at `http://localhost:8080` to start developing. During development, any API key will be accepted.
+Then, point your frontend at `http://localhost:8080` to start developing. During
+development, any API key will be accepted.
diff --git a/website/README.md b/website/README.md
index f70dcfce..5198a820 100644
--- a/website/README.md
+++ b/website/README.md
@@ -26,8 +26,8 @@ This website is built using:
     development.
 1.  [Prisma](https://www.prisma.io/): An ORM to interact with a web specific
     [Postgres](https://www.postgresql.org/) database.
-1.  [NextAuth.js](https://next-auth.js.org/): A user authentication framework
-    to ensure we handle accounts with best practices.
+1.  [NextAuth.js](https://next-auth.js.org/): A user authentication framework to
+    ensure we handle accounts with best practices.
 1.  [TailwindCSS](https://tailwindcss.com/): A general purpose framework for
     styling any component.
 1.  [Chakra-UI](https://chakra-ui.com/): A wide collection of pre-built UI
@@ -38,10 +38,10 @@ This website is built using:
 To contribute to the website, make sure you have the following setup and
 installed:
 
-1.  [NVM](https://github.com/nvm-sh/nvm): The Node Version Manager makes it
-    easy to ensure you have the right NodeJS version installed. Once installed,
-    run `nvm use 16` to use Node 16.x. The website is known to be stable with
-    NodeJS version 16.x. This will install both Node and NPM.
+1.  [NVM](https://github.com/nvm-sh/nvm): The Node Version Manager makes it easy
+    to ensure you have the right NodeJS version installed. Once installed, run
+    `nvm use 16` to use Node 16.x. The website is known to be stable with NodeJS
+    version 16.x. This will install both Node and NPM.
 1.  [Docker](https://www.docker.com/): We use docker to simplify running
     dependent services.
 
@@ -50,8 +50,8 @@ installed:
 If you're doing active development we suggest the following workflow:
 
 1.  In one tab, navigate to the project root.
-1.  Run `docker compose up frontend-dev --build --attach-dependencies`. You can optionally include `-d` to detach and
-    later track the logs if desired.
+1.  Run `docker compose up frontend-dev --build --attach-dependencies`. You can
+    optionally include `-d` to detach and later track the logs if desired.
 1.  In another tab navigate to `${OPEN_ASSISTANT_ROOT/website`.
 1.  Run `npm install`
 1.  Run `npx prisma db push` (This is also needed when you restart the docker
@@ -64,17 +64,25 @@ If you're doing active development we suggest the following workflow:
 
 ### Using debug user credentials
 
-You can use the debug credentials provider to log in without fancy emails or OAuth.
+You can use the debug credentials provider to log in without fancy emails or
+OAuth.
 
-1. This feature is automatically on in development mode, i.e. when you run `npm run dev`. In case you want to do the same with a production build (for example, the docker image), then run the website with environment variable `DEBUG_LOGIN=true`.
+1. This feature is automatically on in development mode, i.e. when you run
+   `npm run dev`. In case you want to do the same with a production build (for
+   example, the docker image), then run the website with environment variable
+   `DEBUG_LOGIN=true`.
 1. Use the `Login` button in the top right to go to the login page.
-1. You should see a section for debug credentials. Enter any username you wish, you will be logged in as that user.
+1. You should see a section for debug credentials. Enter any username you wish,
+   you will be logged in as that user.
 
 ### Using Storybook
 
-To develop components using [Storybook](https://storybook.js.org/) run `npm run storybook`. Then navigate to in your browser to `http://localhost:6006`.
+To develop components using [Storybook](https://storybook.js.org/) run
+`npm run storybook`. Then navigate to in your browser to
+`http://localhost:6006`.
 
-To create a new story create a file named `[componentName].stories.js`. An example how such a story could look like, see `Header.stories.jsx`.
+To create a new story create a file named `[componentName].stories.js`. An
+example how such a story could look like, see `Header.stories.jsx`.
 
 ## Code Layout
 
@@ -82,11 +90,12 @@ To create a new story create a file named `[componentName].stories.js`. An examp
 
 All react code is under `src/` with a few sub directories:
 
-1.  `pages/`: All pages a user could navigate too and API URLs which are under `pages/api/`.
-1.  `components/`: All re-usable React components. If something gets used
-    twice we should create a component and put it here.
-1.  `lib/`: A generic place to store library files that are used anywhere.
-    This doesn't have much structure yet.
+1.  `pages/`: All pages a user could navigate too and API URLs which are under
+    `pages/api/`.
+1.  `components/`: All re-usable React components. If something gets used twice
+    we should create a component and put it here.
+1.  `lib/`: A generic place to store library files that are used anywhere. This
+    doesn't have much structure yet.
 
 NOTE: `styles/` can be ignored for now.
 
@@ -104,16 +113,27 @@ We're not really using CSS styles. `styles/` can be ignored.
 
 ## Testing the UI
 
-Cypress is used for end-to-end (e2e) and component testing and is configured in `./cypress.config.ts`. The `./cypress` folder is used for supporting configuration files etc.
+Cypress is used for end-to-end (e2e) and component testing and is configured in
+`./cypress.config.ts`. The `./cypress` folder is used for supporting
+configuration files etc.
 
 - Store e2e tests in the `./cypress/e2e` folder.
-- Store component tests adjacent to the component being tested. If you want to wriite a test for `./src/components/Layout.tsx` then store the test file at `./src/components/Layout.cy.tsx`.
+- Store component tests adjacent to the component being tested. If you want to
+  wriite a test for `./src/components/Layout.tsx` then store the test file at
+  `./src/components/Layout.cy.tsx`.
 
 A few npm scripts are available for convenience:
 
-- `npm run cypress`: Useful for development, it opens Cypress and allows you to explore, run and debug tests. It assumes you have the NextJS site running at `localhost:3000`.
-- `npm run cypress:run`: Runs all tests. Useful for a quick sanity check before sending a PR or to run in CI pipelines.
-- `npm run cypress:image-baseline`: If you have tests failing because of visual changes that was expected, this command will update the baseline images stored in `./cypress-visual-screenshots/baseline` with those from the adjacent comparison folder. More can be found in the [docs of `uktrade/cypress-image-diff`](https://github.com/uktrade/cypress-image-diff/blob/main/docs/CLI.md#update-all-baseline-images-for-failing-tests).
+- `npm run cypress`: Useful for development, it opens Cypress and allows you to
+  explore, run and debug tests. It assumes you have the NextJS site running at
+  `localhost:3000`.
+- `npm run cypress:run`: Runs all tests. Useful for a quick sanity check before
+  sending a PR or to run in CI pipelines.
+- `npm run cypress:image-baseline`: If you have tests failing because of visual
+  changes that was expected, this command will update the baseline images stored
+  in `./cypress-visual-screenshots/baseline` with those from the adjacent
+  comparison folder. More can be found in the
+  [docs of `uktrade/cypress-image-diff`](https://github.com/uktrade/cypress-image-diff/blob/main/docs/CLI.md#update-all-baseline-images-for-failing-tests).
 
 Read more in the [./cypress README](cypress/).
 
@@ -125,9 +145,9 @@ When writing code for the website, we have a few best practices:
     dependencies. Order them alphabetically according to the package name.
 1.  When trying to implement something new, check if
     [Chakra-UI](https://chakra-ui.com/) has components that are close enough to
-    your need. For example Sliders, Radio Buttons, Progress indicators, etc. They
-    have a lot and we can save time by re-using what they have and tweaking the
-    style as needed.
+    your need. For example Sliders, Radio Buttons, Progress indicators, etc.
+    They have a lot and we can save time by re-using what they have and tweaking
+    the style as needed.
 1.  Format everything with [Prettier](https://prettier.io/). This is done by
     default with pre-submits. We currently don't have any custom settings.
 1.  Define functional React components (with types for all properties when
@@ -135,14 +155,15 @@ When writing code for the website, we have a few best practices:
 
 ### URL Paths
 
-To use stable and consistent URL paths, we recommend the following strategy for new tasks:
+To use stable and consistent URL paths, we recommend the following strategy for
+new tasks:
 
 1.  For any task that involves writing a free-form response, put the page under
     `website/src/pages/create` with a page name matching the task type, such as
     `summarize_story.tsx`.
 1.  For any task that evaluates, rates, or ranks content, put the page under
-    `website/src/pages/evaluate` with a page name matching the task type such
-    as `rate_summary.tsx`.
+    `website/src/pages/evaluate` with a page name matching the task type such as
+    `rate_summary.tsx`.
 
 With this we'll be able to ensure these contribution pages are hidden from
 logged out users but accessible to logged in users.
@@ -151,5 +172,6 @@ logged out users but accessible to logged in users.
 
 To learn more about Next.js, take a look at the following resources:
 
-- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
+- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js
+  features and API.
 - [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
diff --git a/website/cypress/README.md b/website/cypress/README.md
index 12a32378..7f2c5d53 100644
--- a/website/cypress/README.md
+++ b/website/cypress/README.md
@@ -1,14 +1,24 @@
 # Component and e2e testing with Cypress
 
-[Cypress](https://www.cypress.io/) is used for both component- and end-to-end testing. Below there's a few examples for the context of this site. To learn more, the [Cypress documentation](https://docs.cypress.io/guides/getting-started/opening-the-app) has it all.
+[Cypress](https://www.cypress.io/) is used for both component- and end-to-end
+testing. Below there's a few examples for the context of this site. To learn
+more, the
+[Cypress documentation](https://docs.cypress.io/guides/getting-started/opening-the-app)
+has it all.
 
-Don't get scared by the commercial offerings they offer. Their core is open source, the cloud offering is not necesarry at all and can be replaced by CI tooling and [community efforts](https://sorry-cypress.dev/).
+Don't get scared by the commercial offerings they offer. Their core is open
+source, the cloud offering is not necesarry at all and can be replaced by CI
+tooling and [community efforts](https://sorry-cypress.dev/).
 
 # Component testing
 
-To write a new component test, you either create a new `.tsx` adjacent to the component you want to test or you can use the guide presented yo you when running `npm run cypress` which allows you to easily create the skeleton test for an existing component.
+To write a new component test, you either create a new `.tsx` adjacent to the
+component you want to test or you can use the guide presented yo you when
+running `npm run cypress` which allows you to easily create the skeleton test
+for an existing component.
 
-If you have a `Button.tsx` component, create a file next to it called `Button.cy.tsx` which could look like this:
+If you have a `Button.tsx` component, create a file next to it called
+`Button.cy.tsx` which could look like this:
 
 ```typescript
 import React from "react";
@@ -25,17 +35,28 @@ describe("<Button />", () => {
 
 ## What's happening here?
 
-First we use `cy.mount` to mount our component under test. Notive how we specify `className` and inner text - this is where we arrange our component with fake data that we could assert on later.
+First we use `cy.mount` to mount our component under test. Notive how we specify
+`className` and inner text - this is where we arrange our component with fake
+data that we could assert on later.
 
-In the example above, we also use `cy.get` to select the rendered `button` element. Cypress has multiple ways to [select elements](https://docs.cypress.io/guides/references/best-practices), `get` is just one of them (and often not recommended).
+In the example above, we also use `cy.get` to select the rendered `button`
+element. Cypress has multiple ways to
+[select elements](https://docs.cypress.io/guides/references/best-practices),
+`get` is just one of them (and often not recommended).
 
-At last, we use `captureSnapshot` which is a plugin that snaps a photo of the `button` element and compares it to a baseline located in the `./cypress-visual-screenshots/baseline/` folder. If there's too many unidentical pixels between the two, it will fail the test.
+At last, we use `captureSnapshot` which is a plugin that snaps a photo of the
+`button` element and compares it to a baseline located in the
+`./cypress-visual-screenshots/baseline/` folder. If there's too many unidentical
+pixels between the two, it will fail the test.
 
 # End-to-end (e2e) testing
 
-e2e tests are stored in the `./cypress/e2e` folder and should be named `{page}.cy.ts` and located in a relative folder structure that mirrors the page under test.
+e2e tests are stored in the `./cypress/e2e` folder and should be named
+`{page}.cy.ts` and located in a relative folder structure that mirrors the page
+under test.
 
-When running `npm run cypress` and selecting e2e testing, we assume you have the NextJS site running at `localhost:3000`.
+When running `npm run cypress` and selecting e2e testing, we assume you have the
+NextJS site running at `localhost:3000`.
 
 An example test from this time of writing, could look as follows:
 
@@ -53,10 +74,18 @@ export {};
 
 ## What's happening here?
 
-First we use [`cy.visit`](https://docs.cypress.io/api/commands/visit) to point the browser at the desired page. It appends relative paths to the configured `baseUrl` (found in `./cypress.config.ts`).
+First we use [`cy.visit`](https://docs.cypress.io/api/commands/visit) to point
+the browser at the desired page. It appends relative paths to the configured
+`baseUrl` (found in `./cypress.config.ts`).
 
-Cypress will [automatically await](https://docs.cypress.io/guides/core-concepts/introduction-to-cypress#Timeouts) almost anything you do, but fail if the default timeout is reached.
+Cypress will
+[automatically await](https://docs.cypress.io/guides/core-concepts/introduction-to-cypress#Timeouts)
+almost anything you do, but fail if the default timeout is reached.
 
-Then we get the email input field and type our email address. Notice the `{enter}` keyword, this will cause Cypress to hit the return key which we expect to submit the form.
+Then we get the email input field and type our email address. Notice the
+`{enter}` keyword, this will cause Cypress to hit the return key which we expect
+to submit the form.
 
-We then assert that the URL should contain `/auth/verify`. Again the timeout will make sure we are not waiting forever, and the test will fail if we do not manage to get there in a reasonable time.
+We then assert that the URL should contain `/auth/verify`. Again the timeout
+will make sure we are not waiting forever, and the test will fail if we do not
+manage to get there in a reasonable time.