From ad98a282410664bf03cbbaf1b835b440a26d0409 Mon Sep 17 00:00:00 2001 From: theblackcat102 Date: Fri, 30 Dec 2022 17:25:50 +0000 Subject: [PATCH 01/53] [feature] add rank dataset for webgpt and human feedback summary --- model/reward/instructor/README.md | 7 + model/reward/instructor/TODO.md | 12 ++ model/reward/instructor/cls_dataset.py | 73 +++++++++ .../reward/instructor/experimental_dataset.py | 11 ++ model/reward/instructor/rank_datasets.py | 145 ++++++++++++++++++ model/reward/instructor/tests/__init__.py | 0 model/reward/instructor/tests/test_dataset.py | 28 ++++ model/reward/instructor/trainer.py | 2 + model/reward/instructor/utils.py | 18 +++ model/utils.py | 4 + 10 files changed, 300 insertions(+) create mode 100644 model/reward/instructor/README.md create mode 100644 model/reward/instructor/TODO.md create mode 100644 model/reward/instructor/cls_dataset.py create mode 100644 model/reward/instructor/experimental_dataset.py create mode 100644 model/reward/instructor/rank_datasets.py create mode 100644 model/reward/instructor/tests/__init__.py create mode 100644 model/reward/instructor/tests/test_dataset.py create mode 100644 model/reward/instructor/trainer.py create mode 100644 model/reward/instructor/utils.py create mode 100644 model/utils.py diff --git a/model/reward/instructor/README.md b/model/reward/instructor/README.md new file mode 100644 index 00000000..7dbfefbc --- /dev/null +++ b/model/reward/instructor/README.md @@ -0,0 +1,7 @@ + + + +```bash + + +``` \ No newline at end of file diff --git a/model/reward/instructor/TODO.md b/model/reward/instructor/TODO.md new file mode 100644 index 00000000..33bc6595 --- /dev/null +++ b/model/reward/instructor/TODO.md @@ -0,0 +1,12 @@ + +Some other reward features we can use + + +Summaries from human feedback + +* use `confidence` score into the RM learning, ensure the output rank score correlates with confidence + +* each labeling has a labeling `note`, basically comments by labeler, not sure what else we can use + + + diff --git a/model/reward/instructor/cls_dataset.py b/model/reward/instructor/cls_dataset.py new file mode 100644 index 00000000..54bbd19e --- /dev/null +++ b/model/reward/instructor/cls_dataset.py @@ -0,0 +1,73 @@ +''' + + classification based ranking + +''' +import os +import json +import random +import torch +import numpy as np +from dataset import load_dataset +from torch.utils.data import Dataset +from .utils import webgpt_return_format + +class WebGPTDataset(Dataset): + def __init__(self, mode='train', index_cache='dataset/webgpt_train_idx.pt', additional_dataset=None) -> None: + super().__init__() + ''' + mode : train or val, used for validation purpose, has nothing to do with original split + additional_dataset : a list of jsonline format with idx, question and texts (generate candidates) + idx : must match the index you iterate from comparison enumerate order + question : for validation purpose + texts : list of K generate results from the question prompt + ''' + os.makedirs('dataset', exist_ok=True) + dataset = load_dataset("openai/webgpt_comparisons") + if os.path.exists(index_cache): + train_idx = torch.load(index_cache) + else: + train_idx = np.random.choice(range(len(dataset['train'])), int(len(dataset['train'])*0.8), replace=False) + torch.save(set(train_idx.tolist()), index_cache) + self.dataset = [] + self.dataset_index = [] + for idx, row in enumerate(dataset['train']): + if mode == 'train' and idx in train_idx: + self.dataset.append(webgpt_return_format(row)) + self.dataset_index.append(idx) + elif idx not in train_idx and mode != 'train': + self.dataset.append(webgpt_return_format(row)) + self.dataset_index.append(idx) + + # since this dataset was generated from 176B GPT-3 + # we needed some more sample generated from the starting model + # since this model must rank model generated by GPT-3 being better than your starting model + self.sample_additional = False + if additional_dataset is not None: + self.sample_additional = True + self.additional = {} + with open(additional_dataset, 'r') as f: + for line in f: + row = json.loads(line) + if row['idx'] in self.dataset_index: + self.additional[row['idx']] = row['negatives'] + if len(self.additional) != len(self.dataset_index): + for match_idx in self.dataset_index: + if match_idx in self.additional: + continue + + idx = match_idx-900 + while idx not in self.additional: + idx -= 1 + self.additional[match_idx] = self.additional[idx] + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, index): + row = self.dataset[index] + if not self.sample_additional: + return row['question'], row['pos'], row['neg'] + + gen_neg = random.choice(self.additional[self.dataset_index[index]]) + return row['question'], row['pos'], row['neg'], gen_neg diff --git a/model/reward/instructor/experimental_dataset.py b/model/reward/instructor/experimental_dataset.py new file mode 100644 index 00000000..145588c4 --- /dev/null +++ b/model/reward/instructor/experimental_dataset.py @@ -0,0 +1,11 @@ +''' + + +''' +import os +import json +import random +import torch +import numpy as np +from dataset import load_dataset +from torch.utils.data import Dataset diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py new file mode 100644 index 00000000..7fef5ab7 --- /dev/null +++ b/model/reward/instructor/rank_datasets.py @@ -0,0 +1,145 @@ +''' + author: theblackcat102 + + A list of rank based dataset for training using rank loss + + Some nice features to have + + [ ] + +''' +import os +import glob +import json +import numpy as np +from torch.utils.data import Dataset +from datasets import load_dataset + +class CollateFN(): + def __init__(self, tokenizer, max_length=400) -> None: + self.tokenizer = tokenizer + self.max_length = max_length + + def __call__(self, batch): + prompts = [] + pos_sentences = [] + neg_sentences = [] + for prompt, pairs in batch: + for (pos, neg) in pairs: + prompts.append(prompt) + pos_sentences.append(pos) + neg_sentences.append(neg) + + batch = [self.tokenizer(prompts, pos_sentences, return_tensors='pt', max_length=self.max_length, padding=True, truncation=True),\ + self.tokenizer(prompts, neg_sentences, return_tensors='pt', max_length=self.max_length, padding=True, truncation=True)] + return batch + +class WebGPT(Dataset): + + def __init__(self) -> None: + super().__init__() + + dataset = load_dataset("openai/webgpt_comparisons") + questions = {} + # using prompt as our index will allows us + # to add additional generated prompt later + self.index2question = {} + for row in dataset['train']: + question = row['question']['full_text'] + if question not in self.index2question: + self.index2question[len(self.index2question)] = question + + if question not in questions: + questions[question] = [] + + if row['score_0'] > row['score_1']: + # not going to risk it + questions[question].append(( + row['answer_0'], row['answer_1'] + )) + else: + questions[question].append(( + row['answer_1'], row['answer_0'] + )) + + self.questions = questions + + def __len__(self): + return len(self.index2question) + + def __getitem__(self, index): + question = self.index2question[index] + rows = self.questions[question] + # optimize the format later + return question, rows + + + + +class HFSummary(Dataset): + ''' + Human feedback data from OpenAI + https://github.com/openai/summarize-from-feedback + + >> azcopy copy "https://openaipublic.blob.core.windows.net/summarize-from-feedback/dataset/*" . --recursive + + choice : 0 or 1 + + ''' + def __init__(self, split='train', + path='summarize-from-feedback/comparisons/*.json', + conf_threshold=-1, + max_comparison_per_sample=5) -> None: + super().__init__() + assert split in ('train', 'valid1', 'valid2', 'test') + summaries = {} + # using prompt as our index will allows us + # to add additional generated prompt later + self.index2summary = {} + self.max_comparison_per_sample = max_comparison_per_sample + for jsonl_file in glob.glob(path): + with open(jsonl_file, 'r') as f: + for line in f: + data = json.loads(line) + if data['split'] != split: + continue + if 'extra' in data and \ + 'confidence' in data['extra'] and \ + conf_threshold > data['extra']['confidence']: + print('skipping {}'.format(data['info']['id'])) + continue + + if 'article' in data['info']: + context = data['info']['article'] + elif 'post' in data['info']: + context = data['info']['post'] + + if context not in self.index2summary: + self.index2summary[len(self.index2summary)] = context + + if context not in summaries: + summaries[context] = [] + + pos, neg = (0, 1) if data['choice'] == 0 else (1, 0) + summaries[context].append(( + data['summaries'][pos]['text'], + data['summaries'][neg]['text'] + )) + + self.summaries = summaries + + def __len__(self): + return len(self.index2summary) + + def __getitem__(self, index): + context = self.index2summary[index] + # return pairs of comparison + rows = self.summaries[context] + # pair very big + # we are going to do some sampling + # not optimal but good for now + valid_idx = np.random.choice(len(rows), self.max_comparison_per_sample) + # optimize the format later + return context, [ r for idx, r in enumerate(rows) if idx in valid_idx ] + + diff --git a/model/reward/instructor/tests/__init__.py b/model/reward/instructor/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/model/reward/instructor/tests/test_dataset.py b/model/reward/instructor/tests/test_dataset.py new file mode 100644 index 00000000..4dd59c16 --- /dev/null +++ b/model/reward/instructor/tests/test_dataset.py @@ -0,0 +1,28 @@ +from transformers import AutoTokenizer +from torch.utils.data import DataLoader +from rank_datasets import WebGPT, HFSummary, CollateFN + + +def test_hfsummary(): + + tokenizer = AutoTokenizer.from_pretrained("bigscience/mt0-large") + collate_fn = CollateFN(tokenizer) + dataset = HFSummary() + dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=8) + for batch in dataloader: + print(batch[0]['input_ids'].shape) + + +def test_webgpt(): + + tokenizer = AutoTokenizer.from_pretrained("bigscience/mt0-large") + collate_fn = CollateFN(tokenizer) + dataset = WebGPT() + dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=32) + for batch in dataloader: + print(batch[0]['input_ids'].shape) + + +if __name__ == "__main__": + test_hfsummary() + # test_webgpt() \ No newline at end of file diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py new file mode 100644 index 00000000..9ee5e043 --- /dev/null +++ b/model/reward/instructor/trainer.py @@ -0,0 +1,2 @@ +import wandb +from accelerate import Accelerator diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py new file mode 100644 index 00000000..1487947c --- /dev/null +++ b/model/reward/instructor/utils.py @@ -0,0 +1,18 @@ +import re + +re_reference_remove = re.compile(r'\[([0-9])+\]|\[([0-9])+,([0-9])+\]') + +def webgpt_return_format(row): + if row['score_0'] >= row['score_1']: + # remove this to prevent information leak, since we are not using reference + return { + 'question': row['question']['full_text'], + 'pos': re_reference_remove.sub('', row['answer_0']), + 'neg': re_reference_remove.sub('', row['answer_1']) + } + + return { + 'question': row['question']['full_text'], + 'pos': re_reference_remove.sub('', row['answer_1']), + 'neg': re_reference_remove.sub('', row['answer_0']) + } diff --git a/model/utils.py b/model/utils.py new file mode 100644 index 00000000..579b3f6e --- /dev/null +++ b/model/utils.py @@ -0,0 +1,4 @@ +from transformers import AutoTokenizer + + +def update_galactica_tokenizer(): \ No newline at end of file From bcd5c52b3b370a217042b2ccb1983e113ecf6193 Mon Sep 17 00:00:00 2001 From: theblackcat102 Date: Sat, 31 Dec 2022 03:02:10 +0000 Subject: [PATCH 02/53] [feature] working trainer code --- .vscode/settings.json | 2 +- .../reward/instructor/experimental_dataset.py | 10 +- model/reward/instructor/rank_datasets.py | 49 ++++++--- model/reward/instructor/tests/test_dataset.py | 10 +- model/reward/instructor/trainer.py | 104 +++++++++++++++++- model/reward/instructor/utils.py | 23 ++++ 6 files changed, 174 insertions(+), 24 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 56a51f78..4c58a32f 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,4 @@ { - "python.formatting.provider": "black", + "python.formatting.provider": "autopep8", "python.analysis.extraPaths": ["${workspaceFolder}/oasst-shared"] } diff --git a/model/reward/instructor/experimental_dataset.py b/model/reward/instructor/experimental_dataset.py index 145588c4..f705ccf6 100644 --- a/model/reward/instructor/experimental_dataset.py +++ b/model/reward/instructor/experimental_dataset.py @@ -1,5 +1,11 @@ ''' - + HFSummary + + I want to train a multi regression model on axis_evals dataset mainly we can estimate the score of these score + + - {"overall": "6", "accuracy": "6", "coverage": "6", "coherence": "7"} + + Should be better than just a preference score ''' import os @@ -9,3 +15,5 @@ import torch import numpy as np from dataset import load_dataset from torch.utils.data import Dataset + + diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py index 7fef5ab7..e407b30f 100644 --- a/model/reward/instructor/rank_datasets.py +++ b/model/reward/instructor/rank_datasets.py @@ -8,32 +8,51 @@ [ ] ''' +from typing import Optional, Union import os import glob import json +from dataclasses import dataclass import numpy as np from torch.utils.data import Dataset +import torch from datasets import load_dataset +from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy -class CollateFN(): - def __init__(self, tokenizer, max_length=400) -> None: - self.tokenizer = tokenizer - self.max_length = max_length +@dataclass +class DataCollatorForPairRank: + """ - def __call__(self, batch): - prompts = [] - pos_sentences = [] - neg_sentences = [] - for prompt, pairs in batch: + Data collator that will dynamically pad the inputs for multiple choice received. + + """ + tokenizer: PreTrainedTokenizerBase + num_choices: int = 2 + padding: Union[bool, str, PaddingStrategy] = True + max_length: Optional[int] = None + pad_to_multiple_of: Optional[int] = None + + def __call__(self, features): + + flatten_features = [] + batch_size = 0 + for question, pairs in features: for (pos, neg) in pairs: - prompts.append(prompt) - pos_sentences.append(pos) - neg_sentences.append(neg) - - batch = [self.tokenizer(prompts, pos_sentences, return_tensors='pt', max_length=self.max_length, padding=True, truncation=True),\ - self.tokenizer(prompts, neg_sentences, return_tensors='pt', max_length=self.max_length, padding=True, truncation=True)] + flatten_features.append(self.tokenizer(question, pos, truncation=True)) + flatten_features.append(self.tokenizer(question, neg, truncation=True)) + batch_size += 1 + + batch = self.tokenizer.pad( + flatten_features, + padding=self.padding, + max_length=self.max_length, + pad_to_multiple_of=self.pad_to_multiple_of, + return_tensors="pt", + ) + # batch = {k: v.view(batch_size, self.num_choices, -1) for k, v in batch.items()} return batch + class WebGPT(Dataset): def __init__(self) -> None: diff --git a/model/reward/instructor/tests/test_dataset.py b/model/reward/instructor/tests/test_dataset.py index 4dd59c16..c452786b 100644 --- a/model/reward/instructor/tests/test_dataset.py +++ b/model/reward/instructor/tests/test_dataset.py @@ -1,26 +1,26 @@ from transformers import AutoTokenizer from torch.utils.data import DataLoader -from rank_datasets import WebGPT, HFSummary, CollateFN +from rank_datasets import WebGPT, HFSummary, DataCollatorForMultipleChoice def test_hfsummary(): tokenizer = AutoTokenizer.from_pretrained("bigscience/mt0-large") - collate_fn = CollateFN(tokenizer) + collate_fn = DataCollatorForMultipleChoice(tokenizer, max_length=200) dataset = HFSummary() dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=8) for batch in dataloader: - print(batch[0]['input_ids'].shape) + print(batch['input_ids'].shape) def test_webgpt(): tokenizer = AutoTokenizer.from_pretrained("bigscience/mt0-large") - collate_fn = CollateFN(tokenizer) + collate_fn = DataCollatorForMultipleChoice(tokenizer, max_length=200) dataset = WebGPT() dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=32) for batch in dataloader: - print(batch[0]['input_ids'].shape) + print(batch['input_ids'].shape) if __name__ == "__main__": diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py index 9ee5e043..43a5f8ef 100644 --- a/model/reward/instructor/trainer.py +++ b/model/reward/instructor/trainer.py @@ -1,2 +1,102 @@ -import wandb -from accelerate import Accelerator +from typing import Callable, List, Optional, Tuple, Union, Dict +import torch +from torch import nn +import numpy as np +import evaluate +from dataclasses import dataclass +from torch.utils.data import Dataset +from transformers import AutoModelForSequenceClassification, AutoModelForMultipleChoice +from transformers import Trainer, PreTrainedModel, TrainingArguments, DataCollator, EvalPrediction, TrainerCallback, PreTrainedTokenizerBase +from rank_datasets import DataCollatorForPairRank, WebGPT +from utils import get_tokenizer, train_val_dataset + +accuracy = evaluate.load("accuracy") + +@dataclass +class CustomTrainingArguments(TrainingArguments): + loss_function: str='rank' + + +def compute_metrics(eval_pred): + predictions, _ = eval_pred + predictions = np.argmax(predictions, axis=1) + return accuracy.compute(predictions=predictions, references=[0]*predictions.shape[0]) + +class RankLoss(nn.Module): + def __init__(self, eps=1e-8) -> None: + super().__init__() + self.eps = eps + self.log_sigmoid = nn.LogSigmoid() + + def forward(self, pos, neg): + return -self.log_sigmoid(pos - neg + self.eps).mean() + + +class RankTrainer(Trainer): + def __init__(self, model: Union[PreTrainedModel, nn.Module] = None, + args: TrainingArguments = None, + data_collator: Optional[DataCollator] = None, + train_dataset: Optional[Dataset] = None, + eval_dataset: Optional[Dataset] = None, + tokenizer: Optional[PreTrainedTokenizerBase] = None, + model_init: Callable[[], PreTrainedModel] = None, + compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None, + callbacks: Optional[List[TrainerCallback]] = None, + optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), + preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] = None): + super().__init__(model, args, data_collator, train_dataset, eval_dataset, tokenizer, + model_init, compute_metrics, callbacks, optimizers, preprocess_logits_for_metrics) + self.loss_fct = RankLoss() if args.loss_function == 'rank' else nn.CrossEntropyLoss() + self.loss_function = args.loss_function + + def compute_loss(self, model, inputs, return_outputs=False): + # forward pass + outputs = model(**inputs) + logits = outputs.get("logits").view(-1, 2) + if self.loss_function == 'rank': + loss = self.loss_fct(logits[:, 0], logits[:, 1]) + else: + loss = self.loss_fct(logits, torch.zeros(logits.shape[0], device=logits.device, dtype=torch.long)) + + return (loss, outputs) if return_outputs else loss + + +if __name__ == "__main__": + model_name = 'bigscience/bloomz-560m' + model_name = 'google/electra-base-discriminator' + model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type='regression') + tokenizer = get_tokenizer(model_name) + args = CustomTrainingArguments( + output_dir=f"outputs/{model_name}-finetuned", + fp16=True, + num_train_epochs=4, + warmup_steps=500, + learning_rate=3e-5, + # half_precision_backend="apex", + gradient_checkpointing=False, + gradient_accumulation_steps=6, + per_device_train_batch_size=12, + per_device_eval_batch_size=5, + weight_decay=0.01, + max_grad_norm=2.0, + logging_steps=10, + save_total_limit=4, + evaluation_strategy='steps', + loss_function='rank', + eval_steps=500, + save_steps=1000, + report_to="wandb", + run_name='reward-model' + ) + dataset = WebGPT() + train, eval = train_val_dataset(dataset) + collate_fn = DataCollatorForPairRank(tokenizer, max_length=400) + trainer = RankTrainer( + model, + args, + train_dataset=train, + eval_dataset=eval, + data_collator=collate_fn, + tokenizer=tokenizer + ) + trainer.train() diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py index 1487947c..10f84193 100644 --- a/model/reward/instructor/utils.py +++ b/model/reward/instructor/utils.py @@ -1,4 +1,7 @@ import re +from torch.utils.data import Subset +from sklearn.model_selection import train_test_split +from transformers import AutoTokenizer re_reference_remove = re.compile(r'\[([0-9])+\]|\[([0-9])+,([0-9])+\]') @@ -16,3 +19,23 @@ def webgpt_return_format(row): 'pos': re_reference_remove.sub('', row['answer_1']), 'neg': re_reference_remove.sub('', row['answer_0']) } + + +def get_tokenizer(tokenizer_name): + tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) + if 'galactica' in tokenizer_name: + tokenizer.add_special_tokens({'pad_token':'', 'eos_token': '' }) + + return tokenizer + + + +def train_val_dataset(dataset, val_split=0.2): + train_idx, val_idx = train_test_split(list(range(len(dataset))), + test_size=val_split, random_state=666, shuffle=True) + # [3879, 11479, 8341, 9177, 10798, 18177, 5735, 15669, 4837, 2760] + print(val_idx[:10]) + # [13582, 5919, 11875, 7373, 19135, 13706, 8555, 15788, 15005, 15209] + print(train_idx[:10]) + return Subset(dataset, train_idx), Subset(dataset, val_idx) + From b2ef4695a0e0b72ff9e3d4c14ae85b9c35ec24da Mon Sep 17 00:00:00 2001 From: theblackcat102 Date: Sat, 31 Dec 2022 03:47:54 +0000 Subject: [PATCH 03/53] [fix] Fix missing accuracy and eval loss --- model/reward/instructor/trainer.py | 43 +++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py index 43a5f8ef..45ee76c6 100644 --- a/model/reward/instructor/trainer.py +++ b/model/reward/instructor/trainer.py @@ -1,4 +1,6 @@ -from typing import Callable, List, Optional, Tuple, Union, Dict +import os +os.environ['WANDB_PROJECT'] = 'reward-model' +from typing import Any, Callable, List, Optional, Tuple, Union, Dict import torch from torch import nn import numpy as np @@ -60,6 +62,29 @@ class RankTrainer(Trainer): return (loss, outputs) if return_outputs else loss + def _compute_loss(self, model, inputs): + inputs = self._prepare_inputs(inputs) + outputs = model(**inputs) + logits = outputs.get("logits").view(-1, 2) + if self.loss_function == 'rank': + loss = self.loss_fct(logits[:, 0], logits[:, 1]) + else: + loss = self.loss_fct(logits, torch.zeros(logits.shape[0], device=logits.device, dtype=torch.long)) + + return loss, logits + + def prediction_step(self, model: nn.Module, inputs: Dict[str, Union[torch.Tensor, Any]], prediction_loss_only: bool, ignore_keys: Optional[List[str]] = None) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]: + + with torch.no_grad(): + # compute loss on predict data + loss, logits = self._compute_loss(model, inputs) + + loss = loss.mean().detach() + labels = torch.zeros(logits.shape[0], device=logits.device, dtype=torch.long) + if self.args.prediction_loss_only: + return (loss, None, None) + + return (loss, logits, labels) if __name__ == "__main__": model_name = 'bigscience/bloomz-560m' @@ -67,26 +92,25 @@ if __name__ == "__main__": model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type='regression') tokenizer = get_tokenizer(model_name) args = CustomTrainingArguments( - output_dir=f"outputs/{model_name}-finetuned", - fp16=True, + output_dir=f"{model_name}-finetuned", num_train_epochs=4, warmup_steps=500, + loss_function='rank', learning_rate=3e-5, # half_precision_backend="apex", + fp16=True, gradient_checkpointing=False, - gradient_accumulation_steps=6, - per_device_train_batch_size=12, + gradient_accumulation_steps=5, + per_device_train_batch_size=16, per_device_eval_batch_size=5, weight_decay=0.01, max_grad_norm=2.0, logging_steps=10, save_total_limit=4, evaluation_strategy='steps', - loss_function='rank', eval_steps=500, save_steps=1000, - report_to="wandb", - run_name='reward-model' + report_to='wandb' ) dataset = WebGPT() train, eval = train_val_dataset(dataset) @@ -97,6 +121,7 @@ if __name__ == "__main__": train_dataset=train, eval_dataset=eval, data_collator=collate_fn, - tokenizer=tokenizer + tokenizer=tokenizer, + compute_metrics=compute_metrics ) trainer.train() From 3a10f1024ab16a00acb42b400ac5195a0aec07b5 Mon Sep 17 00:00:00 2001 From: theblackcat102 Date: Sat, 31 Dec 2022 09:27:09 +0000 Subject: [PATCH 04/53] [fix] Fix truncation in collate fn --- model/reward/instructor/rank_datasets.py | 11 +++++++---- model/reward/instructor/trainer.py | 15 ++++++++------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py index e407b30f..128baafe 100644 --- a/model/reward/instructor/rank_datasets.py +++ b/model/reward/instructor/rank_datasets.py @@ -38,8 +38,10 @@ class DataCollatorForPairRank: batch_size = 0 for question, pairs in features: for (pos, neg) in pairs: - flatten_features.append(self.tokenizer(question, pos, truncation=True)) - flatten_features.append(self.tokenizer(question, neg, truncation=True)) + flatten_features.append(self.tokenizer(question, pos, + truncation=True, max_length=self.max_length)) + flatten_features.append(self.tokenizer(question, neg, + truncation=True, max_length=self.max_length)) batch_size += 1 batch = self.tokenizer.pad( @@ -147,6 +149,8 @@ class HFSummary(Dataset): self.summaries = summaries + self.postfix_prompt = ' TLDR;' + def __len__(self): return len(self.index2summary) @@ -159,6 +163,5 @@ class HFSummary(Dataset): # not optimal but good for now valid_idx = np.random.choice(len(rows), self.max_comparison_per_sample) # optimize the format later - return context, [ r for idx, r in enumerate(rows) if idx in valid_idx ] - + return context+self.postfix_prompt, [ r for idx, r in enumerate(rows) if idx in valid_idx ] diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py index 45ee76c6..586c8d47 100644 --- a/model/reward/instructor/trainer.py +++ b/model/reward/instructor/trainer.py @@ -6,10 +6,10 @@ from torch import nn import numpy as np import evaluate from dataclasses import dataclass -from torch.utils.data import Dataset +from torch.utils.data import Dataset, ConcatDataset from transformers import AutoModelForSequenceClassification, AutoModelForMultipleChoice from transformers import Trainer, PreTrainedModel, TrainingArguments, DataCollator, EvalPrediction, TrainerCallback, PreTrainedTokenizerBase -from rank_datasets import DataCollatorForPairRank, WebGPT +from rank_datasets import DataCollatorForPairRank, WebGPT, HFSummary from utils import get_tokenizer, train_val_dataset accuracy = evaluate.load("accuracy") @@ -88,7 +88,7 @@ class RankTrainer(Trainer): if __name__ == "__main__": model_name = 'bigscience/bloomz-560m' - model_name = 'google/electra-base-discriminator' + model_name = 'google/electra-large-discriminator' model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type='regression') tokenizer = get_tokenizer(model_name) args = CustomTrainingArguments( @@ -99,9 +99,9 @@ if __name__ == "__main__": learning_rate=3e-5, # half_precision_backend="apex", fp16=True, - gradient_checkpointing=False, - gradient_accumulation_steps=5, - per_device_train_batch_size=16, + gradient_checkpointing=True, + gradient_accumulation_steps=8, + per_device_train_batch_size=8, per_device_eval_batch_size=5, weight_decay=0.01, max_grad_norm=2.0, @@ -114,7 +114,8 @@ if __name__ == "__main__": ) dataset = WebGPT() train, eval = train_val_dataset(dataset) - collate_fn = DataCollatorForPairRank(tokenizer, max_length=400) + train = ConcatDataset([train, HFSummary()]) + collate_fn = DataCollatorForPairRank(tokenizer, max_length=440) trainer = RankTrainer( model, args, From d2572d032301cff6c4304fd54952d2f49fe1eecd Mon Sep 17 00:00:00 2001 From: theblackcat102 Date: Sat, 31 Dec 2022 09:42:49 +0000 Subject: [PATCH 05/53] [fix] Add drop_token_type to use galactica --- model/reward/instructor/rank_datasets.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py index 128baafe..41740dcf 100644 --- a/model/reward/instructor/rank_datasets.py +++ b/model/reward/instructor/rank_datasets.py @@ -31,6 +31,7 @@ class DataCollatorForPairRank: padding: Union[bool, str, PaddingStrategy] = True max_length: Optional[int] = None pad_to_multiple_of: Optional[int] = None + drop_token_type: bool = False def __call__(self, features): @@ -51,6 +52,8 @@ class DataCollatorForPairRank: pad_to_multiple_of=self.pad_to_multiple_of, return_tensors="pt", ) + if self.drop_token_type: + batch.pop('token_type_ids') # batch = {k: v.view(batch_size, self.num_choices, -1) for k, v in batch.items()} return batch From f3c299757d89fc6913996d852e3e8563ae61b5cf Mon Sep 17 00:00:00 2001 From: theblackcat102 Date: Sat, 31 Dec 2022 17:02:46 +0000 Subject: [PATCH 06/53] [feature] added configs argument for parameters training and recording --- model/reward/instructor/README.md | 3 ++ model/reward/instructor/rank_datasets.py | 2 - model/reward/instructor/trainer.py | 52 ++++++++++++++++-------- model/reward/instructor/utils.py | 38 +++++++++++++++++ 4 files changed, 76 insertions(+), 19 deletions(-) diff --git a/model/reward/instructor/README.md b/model/reward/instructor/README.md index 7dbfefbc..a8b5ef33 100644 --- a/model/reward/instructor/README.md +++ b/model/reward/instructor/README.md @@ -1,5 +1,8 @@ +# Sections to train Reward Model (RM) +Currently we format + ```bash diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py index 41740dcf..aa77089c 100644 --- a/model/reward/instructor/rank_datasets.py +++ b/model/reward/instructor/rank_datasets.py @@ -9,13 +9,11 @@ ''' from typing import Optional, Union -import os import glob import json from dataclasses import dataclass import numpy as np from torch.utils.data import Dataset -import torch from datasets import load_dataset from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py index 586c8d47..06bb8098 100644 --- a/model/reward/instructor/trainer.py +++ b/model/reward/instructor/trainer.py @@ -1,18 +1,22 @@ import os os.environ['WANDB_PROJECT'] = 'reward-model' -from typing import Any, Callable, List, Optional, Tuple, Union, Dict import torch -from torch import nn -import numpy as np +import yaml import evaluate +from typing import Any, Callable, List, Optional, Tuple, Union, Dict +from torch import nn +from argparse import ArgumentParser +import numpy as np from dataclasses import dataclass from torch.utils.data import Dataset, ConcatDataset -from transformers import AutoModelForSequenceClassification, AutoModelForMultipleChoice +from transformers import AutoModelForSequenceClassification from transformers import Trainer, PreTrainedModel, TrainingArguments, DataCollator, EvalPrediction, TrainerCallback, PreTrainedTokenizerBase from rank_datasets import DataCollatorForPairRank, WebGPT, HFSummary -from utils import get_tokenizer, train_val_dataset +from utils import get_tokenizer, train_val_dataset, freeze_top_n_layers, argument_parsing accuracy = evaluate.load("accuracy") +parser = ArgumentParser() +parser.add_argument('config', type=str) @dataclass class CustomTrainingArguments(TrainingArguments): @@ -87,21 +91,26 @@ class RankTrainer(Trainer): return (loss, logits, labels) if __name__ == "__main__": - model_name = 'bigscience/bloomz-560m' - model_name = 'google/electra-large-discriminator' + training_conf = argument_parsing(parser) + + model_name = training_conf['model_name'] model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type='regression') + if 'freeze_layer' in training_conf: + num_layer = training_conf['freeze_layer'] + model = freeze_top_n_layers(model, num_layer) + tokenizer = get_tokenizer(model_name) args = CustomTrainingArguments( output_dir=f"{model_name}-finetuned", - num_train_epochs=4, + num_train_epochs=training_conf['num_train_epochs'], warmup_steps=500, - loss_function='rank', - learning_rate=3e-5, + loss_function=training_conf['loss'], + learning_rate=training_conf['learning_rate'], # half_precision_backend="apex", fp16=True, - gradient_checkpointing=True, - gradient_accumulation_steps=8, - per_device_train_batch_size=8, + gradient_checkpointing=training_conf['gradient_checkpointing'], + gradient_accumulation_steps=training_conf['gradient_checkpointing'], + per_device_train_batch_size=training_conf['per_device_train_batch_size'], per_device_eval_batch_size=5, weight_decay=0.01, max_grad_norm=2.0, @@ -112,10 +121,19 @@ if __name__ == "__main__": save_steps=1000, report_to='wandb' ) - dataset = WebGPT() - train, eval = train_val_dataset(dataset) - train = ConcatDataset([train, HFSummary()]) - collate_fn = DataCollatorForPairRank(tokenizer, max_length=440) + train_datasets, evals = [], {} + if 'webgpt' in training_conf['datasets']: + web_dataset = WebGPT() + train, eval = train_val_dataset(web_dataset) + train_datasets.append(train) + evals['webgpt'] = eval + if 'hfsummary' in training_conf['datasets']: + summary_dataset = HFSummary() + sum_train, sum_eval = train_val_dataset(summary_dataset) + train_datasets.append(sum_train) + evals['hfsummary'] = sum_eval + + collate_fn = DataCollatorForPairRank(tokenizer, max_length=training_conf['max_length']) trainer = RankTrainer( model, args, diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py index 10f84193..4867087c 100644 --- a/model/reward/instructor/utils.py +++ b/model/reward/instructor/utils.py @@ -1,4 +1,5 @@ import re +import yaml from torch.utils.data import Subset from sklearn.model_selection import train_test_split from transformers import AutoTokenizer @@ -39,3 +40,40 @@ def train_val_dataset(dataset, val_split=0.2): print(train_idx[:10]) return Subset(dataset, train_idx), Subset(dataset, val_idx) +def freeze_top_n_layers(model, target_layers): + for name, param in model.name_parameters(): + if 'embed' in name: + param.requires_grad = False + elif 'layer' in name: + tokens = name.split('.') + idx = 0 + for token in tokens: + if 'layer' in token: + break + idx += 1 + + layer_ = int(tokens[idx+1]) + if layer_ < target_layers: + param.requires_grad = False + return model + + +def argument_parsing(parser): + default_params = { + 'num_train_epochs': 4, + 'learning_rate': 3e-5, + 'eval_steps': 500, + 'loss': 'rank', + 'max_length': 440, + 'per_device_train_batch_size': 8, + 'gradient_accumulation_steps': 8, + 'gradient_checkpointing': False, + 'datasets': ['webgpt'] + } + args = parser.parse_args() + with open(args.config, 'r', encoding='utf-8') as f: + training_conf = yaml.safe_load(f.read()) + + return { **default_params, **training_conf } + + From 24e06626f46e1f9a4bd4f112ac8c8af45556e866 Mon Sep 17 00:00:00 2001 From: theblackcat102 Date: Sat, 31 Dec 2022 17:04:44 +0000 Subject: [PATCH 07/53] [fix] Fix missing configs --- model/reward/instructor/configs/electra-base-dis-webgpt.yml | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 model/reward/instructor/configs/electra-base-dis-webgpt.yml diff --git a/model/reward/instructor/configs/electra-base-dis-webgpt.yml b/model/reward/instructor/configs/electra-base-dis-webgpt.yml new file mode 100644 index 00000000..5c02fab7 --- /dev/null +++ b/model/reward/instructor/configs/electra-base-dis-webgpt.yml @@ -0,0 +1,2 @@ +model_name: google/electra-base-discriminator +learning_rate: 3e-5 From 918b7b7ec0446651cb724ee0909288d6a89ce71b Mon Sep 17 00:00:00 2001 From: theblackcat102 Date: Sun, 1 Jan 2023 01:25:53 +0800 Subject: [PATCH 08/53] [feature] Add galactica training config --- model/reward/instructor/configs/galactica-125m.yml | 13 +++++++++++++ model/reward/instructor/configs/galactica-1b.yml | 8 ++++++++ model/reward/instructor/trainer.py | 6 +++--- model/reward/instructor/utils.py | 8 ++++++-- 4 files changed, 30 insertions(+), 5 deletions(-) create mode 100644 model/reward/instructor/configs/galactica-125m.yml create mode 100644 model/reward/instructor/configs/galactica-1b.yml diff --git a/model/reward/instructor/configs/galactica-125m.yml b/model/reward/instructor/configs/galactica-125m.yml new file mode 100644 index 00000000..55e093f5 --- /dev/null +++ b/model/reward/instructor/configs/galactica-125m.yml @@ -0,0 +1,13 @@ +model_name: facebook/galactica-125m +learning_rate: 1e-5 +gradient_checkpointing: false +gradient_accumulation_steps: 32 +per_device_train_batch_size: 2 +warmup_steps: 600 +eval_steps: 200 +save_steps: 500 +max_length: 512 +num_train_epochs: 2 +datasets: + - webgpt + - hfsummary \ No newline at end of file diff --git a/model/reward/instructor/configs/galactica-1b.yml b/model/reward/instructor/configs/galactica-1b.yml new file mode 100644 index 00000000..48ad439b --- /dev/null +++ b/model/reward/instructor/configs/galactica-1b.yml @@ -0,0 +1,8 @@ +model_name: facebook/galactica-1.3b +learning_rate: 6e-6 +gradient_checkpointing: false +gradient_accumulation_steps: 16 +per_device_train_batch_size: 4 +warmup_steps: 600 +eval_steps: 200 +save_steps: 500 \ No newline at end of file diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py index 06bb8098..dbdd91ba 100644 --- a/model/reward/instructor/trainer.py +++ b/model/reward/instructor/trainer.py @@ -109,7 +109,7 @@ if __name__ == "__main__": # half_precision_backend="apex", fp16=True, gradient_checkpointing=training_conf['gradient_checkpointing'], - gradient_accumulation_steps=training_conf['gradient_checkpointing'], + gradient_accumulation_steps=training_conf['gradient_accumulation_steps'], per_device_train_batch_size=training_conf['per_device_train_batch_size'], per_device_eval_batch_size=5, weight_decay=0.01, @@ -132,8 +132,8 @@ if __name__ == "__main__": sum_train, sum_eval = train_val_dataset(summary_dataset) train_datasets.append(sum_train) evals['hfsummary'] = sum_eval - - collate_fn = DataCollatorForPairRank(tokenizer, max_length=training_conf['max_length']) + train = ConcatDataset(train_datasets) + collate_fn = DataCollatorForPairRank(tokenizer, max_length=training_conf['max_length'], drop_token_type= 'galactica' in model_name) trainer = RankTrainer( model, args, diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py index 4867087c..733e6ea7 100644 --- a/model/reward/instructor/utils.py +++ b/model/reward/instructor/utils.py @@ -74,6 +74,10 @@ def argument_parsing(parser): with open(args.config, 'r', encoding='utf-8') as f: training_conf = yaml.safe_load(f.read()) - return { **default_params, **training_conf } - + params = { **default_params, **training_conf } + params['gradient_accumulation_steps'] = int(params['gradient_accumulation_steps']) + params['num_train_epochs'] = int(params['num_train_epochs']) + params['per_device_train_batch_size'] = int(params['per_device_train_batch_size']) + params['learning_rate'] = float(params['learning_rate']) + return params From ba336fb087d10892b47133fdbee49846e6759db4 Mon Sep 17 00:00:00 2001 From: theblackcat102 Date: Sat, 31 Dec 2022 17:43:27 +0000 Subject: [PATCH 09/53] [fix] fix freeze top N layers --- model/reward/instructor/configs/galactica-1b.yml | 10 ++++++++-- model/reward/instructor/trainer.py | 3 +++ model/reward/instructor/utils.py | 7 ++++--- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/model/reward/instructor/configs/galactica-1b.yml b/model/reward/instructor/configs/galactica-1b.yml index 48ad439b..5a094520 100644 --- a/model/reward/instructor/configs/galactica-1b.yml +++ b/model/reward/instructor/configs/galactica-1b.yml @@ -2,7 +2,13 @@ model_name: facebook/galactica-1.3b learning_rate: 6e-6 gradient_checkpointing: false gradient_accumulation_steps: 16 -per_device_train_batch_size: 4 +per_device_train_batch_size: 2 warmup_steps: 600 +freeze_layer: 20 eval_steps: 200 -save_steps: 500 \ No newline at end of file +save_steps: 500 +max_length: 400 +num_train_epochs: 2 +datasets: + - webgpt + - hfsummary \ No newline at end of file diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py index dbdd91ba..22baf130 100644 --- a/model/reward/instructor/trainer.py +++ b/model/reward/instructor/trainer.py @@ -98,6 +98,9 @@ if __name__ == "__main__": if 'freeze_layer' in training_conf: num_layer = training_conf['freeze_layer'] model = freeze_top_n_layers(model, num_layer) + model_parameters = filter(lambda p: p.requires_grad, model.parameters()) + params = sum([np.prod(p.size()) for p in model_parameters]) + print('Number of trainable : {}M'.format(int(params/1e6))) tokenizer = get_tokenizer(model_name) args = CustomTrainingArguments( diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py index 733e6ea7..ef3ed98d 100644 --- a/model/reward/instructor/utils.py +++ b/model/reward/instructor/utils.py @@ -41,23 +41,24 @@ def train_val_dataset(dataset, val_split=0.2): return Subset(dataset, train_idx), Subset(dataset, val_idx) def freeze_top_n_layers(model, target_layers): - for name, param in model.name_parameters(): + for name, param in model.named_parameters(): if 'embed' in name: param.requires_grad = False - elif 'layer' in name: + elif '.layer' in name: tokens = name.split('.') idx = 0 for token in tokens: if 'layer' in token: break idx += 1 + if idx >= len(tokens): + continue layer_ = int(tokens[idx+1]) if layer_ < target_layers: param.requires_grad = False return model - def argument_parsing(parser): default_params = { 'num_train_epochs': 4, From c5b31d0b9e268cebd7b1f3ab8a5327541d8e6dd2 Mon Sep 17 00:00:00 2001 From: theblackcat102 Date: Sat, 31 Dec 2022 18:20:41 +0000 Subject: [PATCH 10/53] [feature] update reamde --- model/reward/instructor/README.md | 25 +++- model/reward/instructor/requirements.txt | 140 +++++++++++++++++++++++ 2 files changed, 163 insertions(+), 2 deletions(-) create mode 100644 model/reward/instructor/requirements.txt diff --git a/model/reward/instructor/README.md b/model/reward/instructor/README.md index a8b5ef33..29716dca 100644 --- a/model/reward/instructor/README.md +++ b/model/reward/instructor/README.md @@ -1,10 +1,31 @@ # Sections to train Reward Model (RM) +Trainer code based on huggingface. Should be compatible with deepspeed or accelerate -Currently we format + + +Requirements + +``` +wandb +evaluate +datasets +transformers +torch==1.12 +``` + +To train your model run this ```bash +python trainer.py configs/electra-base-dis-webgpt.yml +``` + + +## Dataset + +For now we only supports webgpt and summary dataset from OpenAI. Once open-asisstant dataset are available it will be added here. + + -``` \ No newline at end of file diff --git a/model/reward/instructor/requirements.txt b/model/reward/instructor/requirements.txt new file mode 100644 index 00000000..9fc45917 --- /dev/null +++ b/model/reward/instructor/requirements.txt @@ -0,0 +1,140 @@ +aiohttp==3.8.3 +aiosignal==1.3.1 +anyio==3.6.2 +argon2-cffi==21.3.0 +argon2-cffi-bindings==21.2.0 +arrow==1.2.3 +asttokens==2.2.1 +async-timeout==4.0.2 +attrs==22.2.0 +autopep8==2.0.1 +backcall==0.2.0 +beautifulsoup4==4.11.1 +bleach==5.0.1 +certifi==2022.12.7 +cffi==1.15.1 +charset-normalizer==2.1.1 +click==8.1.3 +comm==0.1.2 +datasets==2.8.0 +debugpy==1.6.4 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.6 +docker-pycreds==0.4.0 +entrypoints==0.4 +evaluate==0.4.0 +exceptiongroup==1.1.0 +executing==1.2.0 +fastjsonschema==2.16.2 +filelock==3.9.0 +fqdn==1.5.1 +frozenlist==1.3.3 +fsspec==2022.11.0 +gitdb==4.0.10 +GitPython==3.1.30 +huggingface-hub==0.11.1 +idna==3.4 +iniconfig==1.1.1 +ipykernel==6.19.4 +ipython==8.7.0 +ipython-genutils==0.2.0 +ipywidgets==8.0.4 +isoduration==20.11.0 +jedi==0.18.2 +Jinja2==3.1.2 +joblib==1.2.0 +jsonpointer==2.3 +jsonschema==4.17.3 +jupyter==1.0.0 +jupyter-console==6.4.4 +jupyter-events==0.5.0 +jupyter_client==7.4.8 +jupyter_core==5.1.1 +jupyter_server==2.0.6 +jupyter_server_terminals==0.4.3 +jupyterlab-pygments==0.2.2 +jupyterlab-widgets==3.0.5 +lightning-utilities==0.5.0 +MarkupSafe==2.1.1 +matplotlib-inline==0.1.6 +mistune==2.0.4 +multidict==6.0.4 +multiprocess==0.70.14 +nbclassic==0.4.8 +nbclient==0.7.2 +nbconvert==7.2.7 +nbformat==5.7.1 +nest-asyncio==1.5.6 +notebook==6.5.2 +notebook_shim==0.2.2 +numpy==1.24.1 +packaging==22.0 +pandas==1.5.2 +pandocfilters==1.5.0 +parso==0.8.3 +pathtools==0.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +platformdirs==2.6.2 +pluggy==1.0.0 +prometheus-client==0.15.0 +promise==2.3 +prompt-toolkit==3.0.36 +protobuf==3.20.1 +psutil==5.9.4 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==10.0.1 +pycodestyle==2.10.0 +pycparser==2.21 +Pygments==2.13.0 +pyrsistent==0.19.3 +pytest==7.2.0 +python-dateutil==2.8.2 +python-json-logger==2.0.4 +pytorch-lightning==1.8.6 +pytz==2022.7 +PyYAML==6.0 +pyzmq==24.0.1 +qtconsole==5.4.0 +QtPy==2.3.0 +regex==2022.10.31 +requests==2.28.1 +responses==0.18.0 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 +scikit-learn==1.2.0 +scipy==1.9.3 +Send2Trash==1.8.0 +sentry-sdk==1.12.1 +setproctitle==1.3.2 +shortuuid==1.0.11 +six==1.16.0 +smmap==5.0.0 +sniffio==1.3.0 +soupsieve==2.3.2.post1 +stack-data==0.6.2 +tensorboardX==2.5.1 +terminado==0.17.1 +threadpoolctl==3.1.0 +tinycss2==1.2.1 +tokenizers==0.13.2 +tomli==2.0.1 +torch==1.12.1+cu116 +torchmetrics==0.11.0 +tornado==6.2 +tqdm==4.64.1 +traitlets==5.8.0 +transformers==4.25.1 +typing_extensions==4.4.0 +uri-template==1.2.0 +urllib3==1.26.13 +wandb==0.13.7 +wcwidth==0.2.5 +webcolors==1.12 +webencodings==0.5.1 +websocket-client==1.4.2 +widgetsnbextension==4.0.5 +xxhash==3.2.0 +yarl==1.8.2 From 8a42ed32950cb7028bb50531281f6c736b2ac4e3 Mon Sep 17 00:00:00 2001 From: mrcabbage972 Date: Sat, 31 Dec 2022 16:44:04 -0500 Subject: [PATCH 11/53] Adding a file for listing relevant research papers --- docs/research/README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 docs/research/README.md diff --git a/docs/research/README.md b/docs/research/README.md new file mode 100644 index 00000000..bf4461e7 --- /dev/null +++ b/docs/research/README.md @@ -0,0 +1,21 @@ +# Research +This page lists research papers that are relevant to the project. + +## Automatically Generating Instruction Data for Training +This line of work is about significantly reducing the need for manually annotated data for the purpose of training [instruction-aligned](https://openai.com/blog/instruction-following/) language models. +### SELF-INSTRUCT: Aligning Language Model with Self Generated Instructions [[ArXiv](https://arxiv.org/pdf/2212.10560.pdf)], [[Github](https://github.com/yizhongw/self-instruct)]. + +> We introduce SELF-INSTRUCT, a framework for improving the instruction-following capabilities of pretrained language models by bootstrapping off its own generations. +> Our pipeline generates instruction, input, and output samples from a language model, then prunes them before using them to finetune the original model. +> Applying our method to vanilla GPT3, we demonstrate a 33% absolute improvement over the original model on SuperNaturalInstructions, on par with the performance of InstructGPT-0011, which is trained with private user data and human annotations. + + +### Tuning Language Models with (Almost) No Human Labor. [[ArXiv](https://arxiv.org/pdf/2212.09689.pdf)], [[Github](https://github.com/orhonovich/unnatural-instructions)]. + +> In this work, we introduce +Unnatural Instructions: a large dataset of creative and diverse instructions, collected with virtually no human labor. +> We collect 64,000 examples by prompting a language model with three seed examples of instructions and eliciting a fourth. +> This set is then expanded by prompting the model to rephrase each instruction, creating a total of approximately 240,000 examples of instructions, inputs, and outputs. +> Experiments show that despite containing a fair amount of noise, training on Unnatural Instructions rivals the effectiveness of training +on open-source manually-curated datasets, surpassing the performance of models such as +T0++ and Tk-Instruct across various benchmarks. \ No newline at end of file From 35f4c2f0d900e5bb1bada6a8748b89f3a2b36367 Mon Sep 17 00:00:00 2001 From: mrcabbage972 Date: Sat, 31 Dec 2022 19:28:40 -0500 Subject: [PATCH 12/53] Adding missing line break --- docs/research/README.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/docs/research/README.md b/docs/research/README.md index bf4461e7..498a858b 100644 --- a/docs/research/README.md +++ b/docs/research/README.md @@ -1,21 +1,23 @@ # Research + This page lists research papers that are relevant to the project. ## Automatically Generating Instruction Data for Training + This line of work is about significantly reducing the need for manually annotated data for the purpose of training [instruction-aligned](https://openai.com/blog/instruction-following/) language models. + ### SELF-INSTRUCT: Aligning Language Model with Self Generated Instructions [[ArXiv](https://arxiv.org/pdf/2212.10560.pdf)], [[Github](https://github.com/yizhongw/self-instruct)]. -> We introduce SELF-INSTRUCT, a framework for improving the instruction-following capabilities of pretrained language models by bootstrapping off its own generations. -> Our pipeline generates instruction, input, and output samples from a language model, then prunes them before using them to finetune the original model. +> We introduce SELF-INSTRUCT, a framework for improving the instruction-following capabilities of pretrained language models by bootstrapping off its own generations. +> Our pipeline generates instruction, input, and output samples from a language model, then prunes them before using them to finetune the original model. > Applying our method to vanilla GPT3, we demonstrate a 33% absolute improvement over the original model on SuperNaturalInstructions, on par with the performance of InstructGPT-0011, which is trained with private user data and human annotations. - ### Tuning Language Models with (Almost) No Human Labor. [[ArXiv](https://arxiv.org/pdf/2212.09689.pdf)], [[Github](https://github.com/orhonovich/unnatural-instructions)]. > In this work, we introduce -Unnatural Instructions: a large dataset of creative and diverse instructions, collected with virtually no human labor. -> We collect 64,000 examples by prompting a language model with three seed examples of instructions and eliciting a fourth. +> Unnatural Instructions: a large dataset of creative and diverse instructions, collected with virtually no human labor. +> We collect 64,000 examples by prompting a language model with three seed examples of instructions and eliciting a fourth. > This set is then expanded by prompting the model to rephrase each instruction, creating a total of approximately 240,000 examples of instructions, inputs, and outputs. > Experiments show that despite containing a fair amount of noise, training on Unnatural Instructions rivals the effectiveness of training -on open-source manually-curated datasets, surpassing the performance of models such as -T0++ and Tk-Instruct across various benchmarks. \ No newline at end of file +> on open-source manually-curated datasets, surpassing the performance of models such as +> T0++ and Tk-Instruct across various benchmarks. From 0119ee666b64b7de779d440976ec367e688a1594 Mon Sep 17 00:00:00 2001 From: theblackcat102 Date: Sun, 1 Jan 2023 02:09:21 +0000 Subject: [PATCH 13/53] [feature] Add support for bloomz --- model/reward/instructor/README.md | 15 ++++++++++++--- model/reward/instructor/configs/bloomz-560m.yml | 10 ++++++++++ .../configs/electra-base-dis-webgpt.yml | 3 ++- model/reward/instructor/rank_datasets.py | 8 +++++++- model/reward/instructor/utils.py | 15 +++++++++++++-- 5 files changed, 44 insertions(+), 7 deletions(-) create mode 100644 model/reward/instructor/configs/bloomz-560m.yml diff --git a/model/reward/instructor/README.md b/model/reward/instructor/README.md index 29716dca..5992dbc0 100644 --- a/model/reward/instructor/README.md +++ b/model/reward/instructor/README.md @@ -1,7 +1,6 @@ # Sections to train Reward Model (RM) -Trainer code based on huggingface. Should be compatible with deepspeed or accelerate - +Trainer code based on huggingface. Compatible with deepspeed or accelerate Requirements @@ -14,7 +13,7 @@ transformers torch==1.12 ``` -To train your model run this +Start training ```bash @@ -26,6 +25,16 @@ python trainer.py configs/electra-base-dis-webgpt.yml For now we only supports webgpt and summary dataset from OpenAI. Once open-asisstant dataset are available it will be added here. +## Model +Check out configs +``` +Open-Assistant/model/reward/instructor/configs/ + bloomz-560m.yml + electra-base-dis-webgpt.yml + galactica-125m.yml + galactica-1b.yml +``` +You can add new huggingface model as you want. diff --git a/model/reward/instructor/configs/bloomz-560m.yml b/model/reward/instructor/configs/bloomz-560m.yml new file mode 100644 index 00000000..c8f55746 --- /dev/null +++ b/model/reward/instructor/configs/bloomz-560m.yml @@ -0,0 +1,10 @@ +model_name: bigscience/bloomz-560m +learning_rate: 3e-5 +gradient_accumulation_steps: 16 +per_device_train_batch_size: 2 +max_length: 600 +freeze_layer: 12 +num_train_epochs: 2 +datasets: + - webgpt + - hfsummary \ No newline at end of file diff --git a/model/reward/instructor/configs/electra-base-dis-webgpt.yml b/model/reward/instructor/configs/electra-base-dis-webgpt.yml index 5c02fab7..fc168b63 100644 --- a/model/reward/instructor/configs/electra-base-dis-webgpt.yml +++ b/model/reward/instructor/configs/electra-base-dis-webgpt.yml @@ -1,2 +1,3 @@ -model_name: google/electra-base-discriminator +model_name: google/electra-large-discriminator learning_rate: 3e-5 +max_length: 300 \ No newline at end of file diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py index aa77089c..3d122915 100644 --- a/model/reward/instructor/rank_datasets.py +++ b/model/reward/instructor/rank_datasets.py @@ -1,6 +1,12 @@ ''' author: theblackcat102 + Dataset output format from __getitem__ + + - question / prompt : string + + - answers / rows : list of tuple pair. The first element in the tuple pair must be the positive pair (rank higher than the second element) + A list of rank based dataset for training using rank loss Some nice features to have @@ -105,7 +111,7 @@ class HFSummary(Dataset): >> azcopy copy "https://openaipublic.blob.core.windows.net/summarize-from-feedback/dataset/*" . --recursive - choice : 0 or 1 + labeling method : pair comparison, 0 or 1 ''' def __init__(self, split='train', diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py index ef3ed98d..f26add55 100644 --- a/model/reward/instructor/utils.py +++ b/model/reward/instructor/utils.py @@ -41,14 +41,16 @@ def train_val_dataset(dataset, val_split=0.2): return Subset(dataset, train_idx), Subset(dataset, val_idx) def freeze_top_n_layers(model, target_layers): + # its possible we can simply detect which module is a ModuleList + # and simply freeze the module without doing string parsing for name, param in model.named_parameters(): if 'embed' in name: param.requires_grad = False - elif '.layer' in name: + elif '.layer' in name or '.h.' in name: tokens = name.split('.') idx = 0 for token in tokens: - if 'layer' in token: + if 'layer' in token or token == 'h': break idx += 1 if idx >= len(tokens): @@ -56,6 +58,7 @@ def freeze_top_n_layers(model, target_layers): layer_ = int(tokens[idx+1]) if layer_ < target_layers: + # print('freeze ', layer_, name) param.requires_grad = False return model @@ -82,3 +85,11 @@ def argument_parsing(parser): params['learning_rate'] = float(params['learning_rate']) return params + + +if __name__ == "__main__": + from transformers import AutoModelForSequenceClassification + + model = AutoModelForSequenceClassification.from_pretrained('bigscience/bloomz-560m') + freeze_top_n_layers(model, 10) + print(model.state_dict().keys()) \ No newline at end of file From e27a3eb3c75e6b3193e712e3cfd76298e0dc6bc6 Mon Sep 17 00:00:00 2001 From: theblackcat102 Date: Sun, 1 Jan 2023 02:22:57 +0000 Subject: [PATCH 14/53] [fix] Tidy up todo and trainer comments --- model/reward/instructor/TODO.md | 13 ++++++++++++- .../instructor/configs/bloomz-560m-summary.yml | 9 +++++++++ model/reward/instructor/trainer.py | 2 +- model/utils.py | 4 ---- 4 files changed, 22 insertions(+), 6 deletions(-) create mode 100644 model/reward/instructor/configs/bloomz-560m-summary.yml delete mode 100644 model/utils.py diff --git a/model/reward/instructor/TODO.md b/model/reward/instructor/TODO.md index 33bc6595..ec23b7c3 100644 --- a/model/reward/instructor/TODO.md +++ b/model/reward/instructor/TODO.md @@ -1,12 +1,23 @@ Some other reward features we can use +0. Finish classifcation feature -Summaries from human feedback +1. Summaries from human feedback * use `confidence` score into the RM learning, ensure the output rank score correlates with confidence * each labeling has a labeling `note`, basically comments by labeler, not sure what else we can use +* Use the score for "overall", "accuracy", "coverage", "coherence" from axis/evals to train an addition model (rank additional aspect of the policy model) + + * this should be placed under experimental_dataset.py + + +2. Add support for anthropic dataset + +* anthropic dataset is more like a conversation tree which is much complex than simply question-answer schema + + * this is basically a MCTS from alphazero. diff --git a/model/reward/instructor/configs/bloomz-560m-summary.yml b/model/reward/instructor/configs/bloomz-560m-summary.yml new file mode 100644 index 00000000..a02f4e4a --- /dev/null +++ b/model/reward/instructor/configs/bloomz-560m-summary.yml @@ -0,0 +1,9 @@ +model_name: bigscience/bloomz-560m +learning_rate: 3e-5 +gradient_accumulation_steps: 16 +per_device_train_batch_size: 2 +max_length: 600 +freeze_layer: 12 +num_train_epochs: 2 +datasets: + - hfsummary \ No newline at end of file diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py index 22baf130..de0b011a 100644 --- a/model/reward/instructor/trainer.py +++ b/model/reward/instructor/trainer.py @@ -92,7 +92,7 @@ class RankTrainer(Trainer): if __name__ == "__main__": training_conf = argument_parsing(parser) - + model_name = training_conf['model_name'] model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type='regression') if 'freeze_layer' in training_conf: diff --git a/model/utils.py b/model/utils.py deleted file mode 100644 index 579b3f6e..00000000 --- a/model/utils.py +++ /dev/null @@ -1,4 +0,0 @@ -from transformers import AutoTokenizer - - -def update_galactica_tokenizer(): \ No newline at end of file From a5a2625e2d15f327d89ee89708284971ba96e59f Mon Sep 17 00:00:00 2001 From: theblackcat102 Date: Sun, 1 Jan 2023 02:55:54 +0000 Subject: [PATCH 15/53] [merge] most of the bugs should be fixed. #77 --- model/reward/instructor/cls_dataset.py | 15 ++++----------- .../test-galactica-125m-classification.yml | 14 ++++++++++++++ model/reward/instructor/rank_datasets.py | 8 ++++++-- model/reward/instructor/trainer.py | 5 ++++- 4 files changed, 28 insertions(+), 14 deletions(-) create mode 100644 model/reward/instructor/configs/test-galactica-125m-classification.yml diff --git a/model/reward/instructor/cls_dataset.py b/model/reward/instructor/cls_dataset.py index 54bbd19e..ff824d19 100644 --- a/model/reward/instructor/cls_dataset.py +++ b/model/reward/instructor/cls_dataset.py @@ -24,20 +24,10 @@ class WebGPTDataset(Dataset): ''' os.makedirs('dataset', exist_ok=True) dataset = load_dataset("openai/webgpt_comparisons") - if os.path.exists(index_cache): - train_idx = torch.load(index_cache) - else: - train_idx = np.random.choice(range(len(dataset['train'])), int(len(dataset['train'])*0.8), replace=False) - torch.save(set(train_idx.tolist()), index_cache) self.dataset = [] self.dataset_index = [] for idx, row in enumerate(dataset['train']): - if mode == 'train' and idx in train_idx: - self.dataset.append(webgpt_return_format(row)) - self.dataset_index.append(idx) - elif idx not in train_idx and mode != 'train': - self.dataset.append(webgpt_return_format(row)) - self.dataset_index.append(idx) + self.dataset.append(webgpt_return_format(row)) # since this dataset was generated from 176B GPT-3 # we needed some more sample generated from the starting model @@ -71,3 +61,6 @@ class WebGPTDataset(Dataset): gen_neg = random.choice(self.additional[self.dataset_index[index]]) return row['question'], row['pos'], row['neg'], gen_neg + + + diff --git a/model/reward/instructor/configs/test-galactica-125m-classification.yml b/model/reward/instructor/configs/test-galactica-125m-classification.yml new file mode 100644 index 00000000..1ad1f47c --- /dev/null +++ b/model/reward/instructor/configs/test-galactica-125m-classification.yml @@ -0,0 +1,14 @@ +model_name: facebook/galactica-125m +learning_rate: 1e-5 +gradient_checkpointing: false +gradient_accumulation_steps: 10 +per_device_train_batch_size: 6 +warmup_steps: 600 +loss: cls +eval_steps: 200 +save_steps: 500 +max_length: 128 +num_train_epochs: 2 +datasets: + - webgpt + - hfsummary \ No newline at end of file diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py index 3d122915..4ba6293c 100644 --- a/model/reward/instructor/rank_datasets.py +++ b/model/reward/instructor/rank_datasets.py @@ -11,7 +11,11 @@ Some nice features to have - [ ] + [] support additional negative samples generated from other models. + + For example we can use galactica-125m to generate a TLDR and assume it was + inferior than the human perference one + ''' from typing import Optional, Union @@ -35,7 +39,7 @@ class DataCollatorForPairRank: padding: Union[bool, str, PaddingStrategy] = True max_length: Optional[int] = None pad_to_multiple_of: Optional[int] = None - drop_token_type: bool = False + drop_token_type: bool = False # galactica def __call__(self, features): diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py index de0b011a..48fc4e8d 100644 --- a/model/reward/instructor/trainer.py +++ b/model/reward/instructor/trainer.py @@ -77,7 +77,10 @@ class RankTrainer(Trainer): return loss, logits - def prediction_step(self, model: nn.Module, inputs: Dict[str, Union[torch.Tensor, Any]], prediction_loss_only: bool, ignore_keys: Optional[List[str]] = None) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]: + def prediction_step(self, model: nn.Module, + inputs: Dict[str, Union[torch.Tensor, Any]], + prediction_loss_only: bool, + ignore_keys: Optional[List[str]] = None) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]: with torch.no_grad(): # compute loss on predict data From 4b7f1f25a138e614ab9f385f08913878a8a21bbb Mon Sep 17 00:00:00 2001 From: theblackcat102 Date: Sun, 1 Jan 2023 03:07:40 +0000 Subject: [PATCH 16/53] [fix] Use official split for eval --- model/reward/instructor/trainer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py index 48fc4e8d..391464c6 100644 --- a/model/reward/instructor/trainer.py +++ b/model/reward/instructor/trainer.py @@ -134,12 +134,14 @@ if __name__ == "__main__": train_datasets.append(train) evals['webgpt'] = eval if 'hfsummary' in training_conf['datasets']: - summary_dataset = HFSummary() - sum_train, sum_eval = train_val_dataset(summary_dataset) + sum_train = HFSummary(split='train') train_datasets.append(sum_train) + sum_eval = HFSummary(split='valid1') + assert len(sum_eval) > 0 evals['hfsummary'] = sum_eval train = ConcatDataset(train_datasets) collate_fn = DataCollatorForPairRank(tokenizer, max_length=training_conf['max_length'], drop_token_type= 'galactica' in model_name) + assert len(evals) > 0 trainer = RankTrainer( model, args, From d7c049560566ef1fcd2bf9ed4f4543e95f24fa95 Mon Sep 17 00:00:00 2001 From: Keith Stevens Date: Sun, 1 Jan 2023 12:48:05 +0900 Subject: [PATCH 17/53] Deleting a few links that don't go to a page we're planning to build out. Updating two links to go to pages that will soon exist --- website/src/components/Footer.tsx | 23 ++++++++++------------- website/src/components/Header/Header.tsx | 7 ------- website/src/pages/index.tsx | 1 - 3 files changed, 10 insertions(+), 21 deletions(-) diff --git a/website/src/components/Footer.tsx b/website/src/components/Footer.tsx index a07ba24a..5c774398 100644 --- a/website/src/components/Footer.tsx +++ b/website/src/components/Footer.tsx @@ -20,24 +20,21 @@ export function Footer() {