From 34ab948adeefe5e6479ce486c71de4992687d455 Mon Sep 17 00:00:00 2001 From: Bobak Hashemi Date: Sun, 1 Jan 2023 23:30:12 -0500 Subject: [PATCH 1/6] testing rankgen integration into instructor trainer --- .../instructor/configs/rankgen-t5-base.yml | 15 +++ model/reward/instructor/models.py | 22 +++++ model/reward/instructor/rank_datasets.py | 23 +++++ model/reward/instructor/requirements.txt | 3 +- model/reward/instructor/trainer.py | 91 ++++++++++++++----- model/reward/instructor/utils.py | 7 +- 6 files changed, 133 insertions(+), 28 deletions(-) create mode 100644 model/reward/instructor/configs/rankgen-t5-base.yml create mode 100644 model/reward/instructor/models.py diff --git a/model/reward/instructor/configs/rankgen-t5-base.yml b/model/reward/instructor/configs/rankgen-t5-base.yml new file mode 100644 index 00000000..7dd39777 --- /dev/null +++ b/model/reward/instructor/configs/rankgen-t5-base.yml @@ -0,0 +1,15 @@ +model_name: kalpeshk2011/rankgen-t5-base-all +tokenizer_name: google/t5-v1_1-base +learning_rate: 6e-6 +gradient_checkpointing: false +gradient_accumulation_steps: 16 +per_device_train_batch_size: 3 +warmup_steps: 600 +freeze_layer: 20 +eval_steps: 200 +save_steps: 500 +max_length: 400 +num_train_epochs: 2 +datasets: + - webgpt + - hfsummary diff --git a/model/reward/instructor/models.py b/model/reward/instructor/models.py new file mode 100644 index 00000000..699f3566 --- /dev/null +++ b/model/reward/instructor/models.py @@ -0,0 +1,22 @@ +import torch +from transformers import AutoModel + +class RankGenModel(torch.nn.Module): + def __init__(self, model_name): + super().__init__() + self.rankgen_hf_hub = model_name + assert model_name in ["kalpeshk2011/rankgen-t5-xl-all", + "kalpeshk2011/rankgen-t5-xl-pg19", + "kalpeshk2011/rankgen-t5-base-all", + "kalpeshk2011/rankgen-t5-large-all"] + self.model = AutoModel.from_pretrained(self.rankgen_hf_hub, trust_remote_code=True) + + def forward(self, prefixes, suffixes): + embedded_prefixes = self.model(**prefixes) + embedded_suffixes = self.model(**suffixes) + # take dot product of each row independently + dot_products = torch.sum(embedded_prefixes * embedded_suffixes, dim=1) + + print(f"{prefixes=}, {suffixes=}, {embedded_prefixes=}, {embedded_suffixes=}, {dot_products=}") + + return dot_products \ No newline at end of file diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py index 99ba9955..3b995a7d 100644 --- a/model/reward/instructor/rank_datasets.py +++ b/model/reward/instructor/rank_datasets.py @@ -24,9 +24,32 @@ from typing import Optional, Union import numpy as np from datasets import load_dataset +import torch from torch.utils.data import Dataset from transformers.tokenization_utils_base import PaddingStrategy, PreTrainedTokenizerBase +@dataclass +class RankGenCollator(): + tokenizer: PreTrainedTokenizerBase + padding: Union[bool, str, PaddingStrategy] = True + max_length: Optional[int] = None + + def __call__(self, batch : list[dict[str, str]]) -> dict[str, torch.Tensor]: + prefixes = [] + better_answers = [] + worse_answers = [] + for question, pairs in batch: + for (pos, neg) in pairs: + prefixes.append("pre " + question) + better_answers.append("suffi " + pos) + worse_answers.append("suffi " + neg) + + tokenized_prefixes = self.tokenizer(prefixes, return_tensors="pt", padding=self.padding, max_length=self.max_length, truncation=True) + tokenized_pos = self.tokenizer(better_answers, return_tensors="pt", padding=self.padding, max_length=self.max_length, truncation=True) + tokenized_neg = self.tokenizer(worse_answers, return_tensors="pt", padding=self.padding, max_length=self.max_length, truncation=True) + return {"prefix" : tokenized_prefixes, + "positive": tokenized_pos, + "negative": tokenized_neg} @dataclass class DataCollatorForPairRank: diff --git a/model/reward/instructor/requirements.txt b/model/reward/instructor/requirements.txt index e225a2ca..eaaf36e6 100644 --- a/model/reward/instructor/requirements.txt +++ b/model/reward/instructor/requirements.txt @@ -1,6 +1,7 @@ datasets==2.8.0 evaluate==0.4.0 scikit-learn==1.2.0 -torch==1.12.1+cu116 +torch>=1.12.1 transformers==4.25.1 wandb==0.13.7 +sentencepiece==0.1.97 diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py index 0e98e4c5..5bb1017a 100644 --- a/model/reward/instructor/trainer.py +++ b/model/reward/instructor/trainer.py @@ -7,10 +7,11 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union import evaluate import numpy as np import torch -from rank_datasets import DataCollatorForPairRank, HFSummary, WebGPT +from rank_datasets import DataCollatorForPairRank, HFSummary, RankGenCollator, WebGPT from torch import nn from torch.utils.data import ConcatDataset, Dataset from transformers import ( + AutoModel, AutoModelForSequenceClassification, DataCollator, EvalPrediction, @@ -20,6 +21,7 @@ from transformers import ( TrainerCallback, TrainingArguments, ) +from models import RankGenModel from utils import argument_parsing, freeze_top_n_layers, get_tokenizer, train_val_dataset os.environ["WANDB_PROJECT"] = "reward-model" @@ -47,14 +49,17 @@ class RankLoss(nn.Module): self.log_sigmoid = nn.LogSigmoid() def forward(self, pos, neg): - return -self.log_sigmoid(pos - neg + self.eps).mean() + loss = -self.log_sigmoid(pos - neg + self.eps).mean() + print(f"in loss {pos=}, {neg=}, {loss=}") + return loss class RankTrainer(Trainer): def __init__( self, model: Union[PreTrainedModel, nn.Module] = None, - args: TrainingArguments = None, + model_name: str = None, + args: Optional[TrainingArguments] = None, data_collator: Optional[DataCollator] = None, train_dataset: Optional[Dataset] = None, eval_dataset: Optional[Dataset] = None, @@ -80,15 +85,26 @@ class RankTrainer(Trainer): ) self.loss_fct = RankLoss() if args.loss_function == "rank" else nn.CrossEntropyLoss() self.loss_function = args.loss_function + self.model_name = model_name def compute_loss(self, model, inputs, return_outputs=False): # forward pass - outputs = model(**inputs) - logits = outputs.get("logits").view(-1, 2) - if self.loss_function == "rank": - loss = self.loss_fct(logits[:, 0], logits[:, 1]) + if "rankgen" in self.model_name: + print(f"{inputs=}") + positive_outputs = model(inputs["prefix"], inputs["positive"]) + negative_outputs = model(inputs["prefix"], inputs["negative"]) + if self.loss_function == "rank": + loss = self.loss_fct(positive_outputs, negative_outputs) + else: + raise NotImplementedError("Only ranking loss has been implemented for rankgen model") + outputs = torch.hstack((positive_outputs, negative_outputs)) #logits else: - loss = self.loss_fct(logits, torch.zeros(logits.shape[0], device=logits.device, dtype=torch.long)) + outputs = model(**inputs) + logits = outputs.get("logits").view(-1, 2) + if self.loss_function == "rank": + loss = self.loss_fct(logits[:, 0], logits[:, 1]) + else: + loss = self.loss_fct(logits, torch.zeros(logits.shape[0], device=logits.device, dtype=torch.long)) return (loss, outputs) if return_outputs else loss @@ -110,32 +126,44 @@ class RankTrainer(Trainer): prediction_loss_only: bool, ignore_keys: Optional[List[str]] = None, ) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]: + with torch.inference_mode(): + if "rankgen" in self.model_name: + inputs = self._prepare_inputs(inputs) + positive_outputs = model(inputs["prefix"], inputs["positive"]) + negative_outputs = model(inputs["prefix"], inputs["negative"]) + if self.loss_function == "rank": + loss = self.loss_fct(positive_outputs, negative_outputs) + else: + raise NotImplementedError("Only ranking loss has been implemented for rankgen model") + outputs = torch.hstack((positive_outputs, negative_outputs)) # logits + return (loss, outputs, None) + else: + # compute loss on predict data + loss, logits = self._compute_loss(model, inputs) - with torch.no_grad(): - # compute loss on predict data - loss, logits = self._compute_loss(model, inputs) + loss = loss.mean().detach() + labels = torch.zeros(logits.shape[0], device=logits.device, dtype=torch.long) + if self.args.prediction_loss_only: + return (loss, None, None) - loss = loss.mean().detach() - labels = torch.zeros(logits.shape[0], device=logits.device, dtype=torch.long) - if self.args.prediction_loss_only: - return (loss, None, None) - - return (loss, logits, labels) + return (loss, logits, labels) if __name__ == "__main__": training_conf = argument_parsing(parser) model_name = training_conf["model_name"] - model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type="regression") + if "rankgen-t5" in model_name: + model = RankGenModel(model_name) + else: + model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type="regression") if "freeze_layer" in training_conf: num_layer = training_conf["freeze_layer"] model = freeze_top_n_layers(model, num_layer) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print("Number of trainable : {}M".format(int(params / 1e6))) - - tokenizer = get_tokenizer(model_name) + args = CustomTrainingArguments( output_dir=f"{model_name}-finetuned", num_train_epochs=training_conf["num_train_epochs"], @@ -170,17 +198,30 @@ if __name__ == "__main__": assert len(sum_eval) > 0 evals["hfsummary"] = sum_eval train = ConcatDataset(train_datasets) - collate_fn = DataCollatorForPairRank( - tokenizer, max_length=training_conf["max_length"], drop_token_type="galactica" in model_name - ) + + if "tokenizer_name" in training_conf: + tokenizer=get_tokenizer(training_conf["tokenizer_name"]) + else: + tokenizer = get_tokenizer(model_name) + + if "rankgen" in model_name: + collate_fn = RankGenCollator( + tokenizer, max_length=training_conf["max_length"] + ) + else: + collate_fn = DataCollatorForPairRank( + tokenizer, max_length=training_conf["max_length"] + ) assert len(evals) > 0 trainer = RankTrainer( - model, - args, + model=model, + model_name=model_name, + args=args, train_dataset=train, eval_dataset=eval, data_collator=collate_fn, tokenizer=tokenizer, compute_metrics=compute_metrics, ) + # trainer.evaluate() trainer.train() diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py index 9441ddb9..59165598 100644 --- a/model/reward/instructor/utils.py +++ b/model/reward/instructor/utils.py @@ -4,7 +4,7 @@ import re import yaml from sklearn.model_selection import train_test_split from torch.utils.data import Subset -from transformers import AutoTokenizer +from transformers import AutoTokenizer, T5Tokenizer re_reference_remove = re.compile(r"\[([0-9])+\]|\[([0-9])+,([0-9])+\]") @@ -26,7 +26,10 @@ def webgpt_return_format(row): def get_tokenizer(tokenizer_name): - tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) + if "t5" in tokenizer_name: #rankgen + tokenizer = T5Tokenizer.from_pretrained(tokenizer_name, truncation_side="left") + else: + tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) if "galactica" in tokenizer_name: tokenizer.add_special_tokens({"pad_token": "", "eos_token": ""}) From 568a42066a80198f197fb0ac42c24af3cb334795 Mon Sep 17 00:00:00 2001 From: Bobak Hashemi Date: Tue, 3 Jan 2023 00:53:07 -0500 Subject: [PATCH 2/6] FP32 Training Works --- model/reward/instructor/configs/rankgen-t5-base.yml | 3 ++- model/reward/instructor/models.py | 6 ++++-- model/reward/instructor/rank_datasets.py | 1 + model/reward/instructor/trainer.py | 4 +--- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/model/reward/instructor/configs/rankgen-t5-base.yml b/model/reward/instructor/configs/rankgen-t5-base.yml index 7dd39777..6776ad47 100644 --- a/model/reward/instructor/configs/rankgen-t5-base.yml +++ b/model/reward/instructor/configs/rankgen-t5-base.yml @@ -2,8 +2,9 @@ model_name: kalpeshk2011/rankgen-t5-base-all tokenizer_name: google/t5-v1_1-base learning_rate: 6e-6 gradient_checkpointing: false +fp16: false gradient_accumulation_steps: 16 -per_device_train_batch_size: 3 +per_device_train_batch_size: 2 warmup_steps: 600 freeze_layer: 20 eval_steps: 200 diff --git a/model/reward/instructor/models.py b/model/reward/instructor/models.py index 699f3566..dc7692bf 100644 --- a/model/reward/instructor/models.py +++ b/model/reward/instructor/models.py @@ -12,11 +12,13 @@ class RankGenModel(torch.nn.Module): self.model = AutoModel.from_pretrained(self.rankgen_hf_hub, trust_remote_code=True) def forward(self, prefixes, suffixes): + # print(list(self.model.parameters())) + # raise Exception("stop") embedded_prefixes = self.model(**prefixes) embedded_suffixes = self.model(**suffixes) # take dot product of each row independently dot_products = torch.sum(embedded_prefixes * embedded_suffixes, dim=1) - print(f"{prefixes=}, {suffixes=}, {embedded_prefixes=}, {embedded_suffixes=}, {dot_products=}") - + # print(f"{embedded_prefixes.shape=}, {embedded_suffixes.shape=}, {prefixes['input_ids'].shape=}, {suffixes['input_ids'].shape=}, {embedded_prefixes=}, {embedded_suffixes=}, {dot_products=}") + # raise Exception("stop") return dot_products \ No newline at end of file diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py index 3b995a7d..965893ce 100644 --- a/model/reward/instructor/rank_datasets.py +++ b/model/reward/instructor/rank_datasets.py @@ -33,6 +33,7 @@ class RankGenCollator(): tokenizer: PreTrainedTokenizerBase padding: Union[bool, str, PaddingStrategy] = True max_length: Optional[int] = None + max_examples: Optional[int] = None def __call__(self, batch : list[dict[str, str]]) -> dict[str, torch.Tensor]: prefixes = [] diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py index 5bb1017a..c6f58f66 100644 --- a/model/reward/instructor/trainer.py +++ b/model/reward/instructor/trainer.py @@ -50,7 +50,6 @@ class RankLoss(nn.Module): def forward(self, pos, neg): loss = -self.log_sigmoid(pos - neg + self.eps).mean() - print(f"in loss {pos=}, {neg=}, {loss=}") return loss @@ -90,7 +89,6 @@ class RankTrainer(Trainer): def compute_loss(self, model, inputs, return_outputs=False): # forward pass if "rankgen" in self.model_name: - print(f"{inputs=}") positive_outputs = model(inputs["prefix"], inputs["positive"]) negative_outputs = model(inputs["prefix"], inputs["negative"]) if self.loss_function == "rank": @@ -171,7 +169,7 @@ if __name__ == "__main__": loss_function=training_conf["loss"], learning_rate=training_conf["learning_rate"], # half_precision_backend="apex", - fp16=True, + fp16=training_conf["fp16"] if "fp16" in training_conf else True, gradient_checkpointing=training_conf["gradient_checkpointing"], gradient_accumulation_steps=training_conf["gradient_accumulation_steps"], per_device_train_batch_size=training_conf["per_device_train_batch_size"], From 45c147362e01e755cce1dc229f56c75cead1aedd Mon Sep 17 00:00:00 2001 From: Bobak Hashemi Date: Tue, 3 Jan 2023 01:41:45 -0500 Subject: [PATCH 3/6] added precommit hooks and cleaned up configs for rankgen --- .../instructor/configs/rankgen-t5-base-fp16.yml | 16 ++++++++++++++++ .../instructor/configs/rankgen-t5-base.yml | 3 +++ 2 files changed, 19 insertions(+) create mode 100644 model/reward/instructor/configs/rankgen-t5-base-fp16.yml diff --git a/model/reward/instructor/configs/rankgen-t5-base-fp16.yml b/model/reward/instructor/configs/rankgen-t5-base-fp16.yml new file mode 100644 index 00000000..c6f2a5e0 --- /dev/null +++ b/model/reward/instructor/configs/rankgen-t5-base-fp16.yml @@ -0,0 +1,16 @@ +model_name: kalpeshk2011/rankgen-t5-base-all +tokenizer_name: google/t5-v1_1-base +learning_rate: 6e-6 +gradient_checkpointing: false +fp16: true +gradient_accumulation_steps: 16 +per_device_train_batch_size: 2 +warmup_steps: 600 +freeze_layer: 20 +eval_steps: 200 +save_steps: 500 +max_length: 400 +num_train_epochs: 2 +datasets: + - webgpt + - hfsummary diff --git a/model/reward/instructor/configs/rankgen-t5-base.yml b/model/reward/instructor/configs/rankgen-t5-base.yml index 6776ad47..bcb4d613 100644 --- a/model/reward/instructor/configs/rankgen-t5-base.yml +++ b/model/reward/instructor/configs/rankgen-t5-base.yml @@ -1,4 +1,7 @@ model_name: kalpeshk2011/rankgen-t5-base-all +# model_name: kalpeshk2011/rankgen-t5-xl-all +# model_name: kalpeshk2011/rankgen-t5-xl-pg19 +# model_name: kalpeshk2011/rankgen-t5-large-all tokenizer_name: google/t5-v1_1-base learning_rate: 6e-6 gradient_checkpointing: false From 4569bcf354a23b986463a9f70ee564785e423ef2 Mon Sep 17 00:00:00 2001 From: Bobak Hashemi Date: Tue, 3 Jan 2023 20:47:33 -0500 Subject: [PATCH 4/6] fixed linting --- model/reward/instructor/models.py | 44 +++++++++++++----------- model/reward/instructor/rank_datasets.py | 26 ++++++++------ model/reward/instructor/requirements.txt | 2 +- model/reward/instructor/trainer.py | 23 +++++-------- model/reward/instructor/utils.py | 2 +- 5 files changed, 51 insertions(+), 46 deletions(-) diff --git a/model/reward/instructor/models.py b/model/reward/instructor/models.py index dc7692bf..084cfa51 100644 --- a/model/reward/instructor/models.py +++ b/model/reward/instructor/models.py @@ -1,24 +1,28 @@ +# -*- coding: utf-8 -*- import torch from transformers import AutoModel -class RankGenModel(torch.nn.Module): - def __init__(self, model_name): - super().__init__() - self.rankgen_hf_hub = model_name - assert model_name in ["kalpeshk2011/rankgen-t5-xl-all", - "kalpeshk2011/rankgen-t5-xl-pg19", - "kalpeshk2011/rankgen-t5-base-all", - "kalpeshk2011/rankgen-t5-large-all"] - self.model = AutoModel.from_pretrained(self.rankgen_hf_hub, trust_remote_code=True) - def forward(self, prefixes, suffixes): - # print(list(self.model.parameters())) - # raise Exception("stop") - embedded_prefixes = self.model(**prefixes) - embedded_suffixes = self.model(**suffixes) - # take dot product of each row independently - dot_products = torch.sum(embedded_prefixes * embedded_suffixes, dim=1) - - # print(f"{embedded_prefixes.shape=}, {embedded_suffixes.shape=}, {prefixes['input_ids'].shape=}, {suffixes['input_ids'].shape=}, {embedded_prefixes=}, {embedded_suffixes=}, {dot_products=}") - # raise Exception("stop") - return dot_products \ No newline at end of file +class RankGenModel(torch.nn.Module): + def __init__(self, model_name): + super().__init__() + self.rankgen_hf_hub = model_name + assert model_name in [ + "kalpeshk2011/rankgen-t5-xl-all", + "kalpeshk2011/rankgen-t5-xl-pg19", + "kalpeshk2011/rankgen-t5-base-all", + "kalpeshk2011/rankgen-t5-large-all", + ] + self.model = AutoModel.from_pretrained(self.rankgen_hf_hub, trust_remote_code=True) + + def forward(self, prefixes, suffixes): + # print(list(self.model.parameters())) + # raise Exception("stop") + embedded_prefixes = self.model(**prefixes) + embedded_suffixes = self.model(**suffixes) + # take dot product of each row independently + dot_products = torch.sum(embedded_prefixes * embedded_suffixes, dim=1) + + # print(f"{embedded_prefixes.shape=}, {embedded_suffixes.shape=}, {prefixes['input_ids'].shape=}, {suffixes['input_ids'].shape=}, {embedded_prefixes=}, {embedded_suffixes=}, {dot_products=}") + # raise Exception("stop") + return dot_products diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py index 965893ce..a63c9e02 100644 --- a/model/reward/instructor/rank_datasets.py +++ b/model/reward/instructor/rank_datasets.py @@ -23,19 +23,20 @@ from dataclasses import dataclass from typing import Optional, Union import numpy as np -from datasets import load_dataset import torch +from datasets import load_dataset from torch.utils.data import Dataset from transformers.tokenization_utils_base import PaddingStrategy, PreTrainedTokenizerBase + @dataclass -class RankGenCollator(): +class RankGenCollator: tokenizer: PreTrainedTokenizerBase padding: Union[bool, str, PaddingStrategy] = True max_length: Optional[int] = None max_examples: Optional[int] = None - def __call__(self, batch : list[dict[str, str]]) -> dict[str, torch.Tensor]: + def __call__(self, batch: list[dict[str, str]]) -> dict[str, torch.Tensor]: prefixes = [] better_answers = [] worse_answers = [] @@ -44,13 +45,18 @@ class RankGenCollator(): prefixes.append("pre " + question) better_answers.append("suffi " + pos) worse_answers.append("suffi " + neg) - - tokenized_prefixes = self.tokenizer(prefixes, return_tensors="pt", padding=self.padding, max_length=self.max_length, truncation=True) - tokenized_pos = self.tokenizer(better_answers, return_tensors="pt", padding=self.padding, max_length=self.max_length, truncation=True) - tokenized_neg = self.tokenizer(worse_answers, return_tensors="pt", padding=self.padding, max_length=self.max_length, truncation=True) - return {"prefix" : tokenized_prefixes, - "positive": tokenized_pos, - "negative": tokenized_neg} + + tokenized_prefixes = self.tokenizer( + prefixes, return_tensors="pt", padding=self.padding, max_length=self.max_length, truncation=True + ) + tokenized_pos = self.tokenizer( + better_answers, return_tensors="pt", padding=self.padding, max_length=self.max_length, truncation=True + ) + tokenized_neg = self.tokenizer( + worse_answers, return_tensors="pt", padding=self.padding, max_length=self.max_length, truncation=True + ) + return {"prefix": tokenized_prefixes, "positive": tokenized_pos, "negative": tokenized_neg} + @dataclass class DataCollatorForPairRank: diff --git a/model/reward/instructor/requirements.txt b/model/reward/instructor/requirements.txt index eaaf36e6..ca3935e4 100644 --- a/model/reward/instructor/requirements.txt +++ b/model/reward/instructor/requirements.txt @@ -1,7 +1,7 @@ datasets==2.8.0 evaluate==0.4.0 scikit-learn==1.2.0 +sentencepiece==0.1.97 torch>=1.12.1 transformers==4.25.1 wandb==0.13.7 -sentencepiece==0.1.97 diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py index c6f58f66..124c28f8 100644 --- a/model/reward/instructor/trainer.py +++ b/model/reward/instructor/trainer.py @@ -7,11 +7,11 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union import evaluate import numpy as np import torch +from models import RankGenModel from rank_datasets import DataCollatorForPairRank, HFSummary, RankGenCollator, WebGPT from torch import nn from torch.utils.data import ConcatDataset, Dataset from transformers import ( - AutoModel, AutoModelForSequenceClassification, DataCollator, EvalPrediction, @@ -21,7 +21,6 @@ from transformers import ( TrainerCallback, TrainingArguments, ) -from models import RankGenModel from utils import argument_parsing, freeze_top_n_layers, get_tokenizer, train_val_dataset os.environ["WANDB_PROJECT"] = "reward-model" @@ -95,7 +94,7 @@ class RankTrainer(Trainer): loss = self.loss_fct(positive_outputs, negative_outputs) else: raise NotImplementedError("Only ranking loss has been implemented for rankgen model") - outputs = torch.hstack((positive_outputs, negative_outputs)) #logits + outputs = torch.hstack((positive_outputs, negative_outputs)) # logits else: outputs = model(**inputs) logits = outputs.get("logits").view(-1, 2) @@ -133,7 +132,7 @@ class RankTrainer(Trainer): loss = self.loss_fct(positive_outputs, negative_outputs) else: raise NotImplementedError("Only ranking loss has been implemented for rankgen model") - outputs = torch.hstack((positive_outputs, negative_outputs)) # logits + outputs = torch.hstack((positive_outputs, negative_outputs)) # logits return (loss, outputs, None) else: # compute loss on predict data @@ -161,7 +160,7 @@ if __name__ == "__main__": model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print("Number of trainable : {}M".format(int(params / 1e6))) - + args = CustomTrainingArguments( output_dir=f"{model_name}-finetuned", num_train_epochs=training_conf["num_train_epochs"], @@ -196,20 +195,16 @@ if __name__ == "__main__": assert len(sum_eval) > 0 evals["hfsummary"] = sum_eval train = ConcatDataset(train_datasets) - + if "tokenizer_name" in training_conf: - tokenizer=get_tokenizer(training_conf["tokenizer_name"]) + tokenizer = get_tokenizer(training_conf["tokenizer_name"]) else: tokenizer = get_tokenizer(model_name) - + if "rankgen" in model_name: - collate_fn = RankGenCollator( - tokenizer, max_length=training_conf["max_length"] - ) + collate_fn = RankGenCollator(tokenizer, max_length=training_conf["max_length"]) else: - collate_fn = DataCollatorForPairRank( - tokenizer, max_length=training_conf["max_length"] - ) + collate_fn = DataCollatorForPairRank(tokenizer, max_length=training_conf["max_length"]) assert len(evals) > 0 trainer = RankTrainer( model=model, diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py index 59165598..780ac9c8 100644 --- a/model/reward/instructor/utils.py +++ b/model/reward/instructor/utils.py @@ -26,7 +26,7 @@ def webgpt_return_format(row): def get_tokenizer(tokenizer_name): - if "t5" in tokenizer_name: #rankgen + if "t5" in tokenizer_name: # rankgen tokenizer = T5Tokenizer.from_pretrained(tokenizer_name, truncation_side="left") else: tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) From da79aa04a0e4d4293b1427395e4a4a5770ea577d Mon Sep 17 00:00:00 2001 From: Bobak Hashemi Date: Tue, 3 Jan 2023 21:45:16 -0500 Subject: [PATCH 5/6] Cleaned up default argument logic. --- model/reward/instructor/trainer.py | 9 +++------ model/reward/instructor/utils.py | 9 ++++++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py index 124c28f8..2eee8b8d 100644 --- a/model/reward/instructor/trainer.py +++ b/model/reward/instructor/trainer.py @@ -168,7 +168,7 @@ if __name__ == "__main__": loss_function=training_conf["loss"], learning_rate=training_conf["learning_rate"], # half_precision_backend="apex", - fp16=training_conf["fp16"] if "fp16" in training_conf else True, + fp16=training_conf["fp16"], gradient_checkpointing=training_conf["gradient_checkpointing"], gradient_accumulation_steps=training_conf["gradient_accumulation_steps"], per_device_train_batch_size=training_conf["per_device_train_batch_size"], @@ -180,7 +180,7 @@ if __name__ == "__main__": evaluation_strategy="steps", eval_steps=training_conf["eval_steps"], save_steps=1000, - report_to="wandb", + report_to="local", ) train_datasets, evals = [], {} if "webgpt" in training_conf["datasets"]: @@ -196,10 +196,7 @@ if __name__ == "__main__": evals["hfsummary"] = sum_eval train = ConcatDataset(train_datasets) - if "tokenizer_name" in training_conf: - tokenizer = get_tokenizer(training_conf["tokenizer_name"]) - else: - tokenizer = get_tokenizer(model_name) + tokenizer = get_tokenizer(training_conf["tokenizer_name"]) if "rankgen" in model_name: collate_fn = RankGenCollator(tokenizer, max_length=training_conf["max_length"]) diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py index 780ac9c8..7946fbb2 100644 --- a/model/reward/instructor/utils.py +++ b/model/reward/instructor/utils.py @@ -71,6 +71,10 @@ def freeze_top_n_layers(model, target_layers): def argument_parsing(parser): + args = parser.parse_args() + with open(args.config, "r", encoding="utf-8") as f: + training_conf = yaml.safe_load(f.read()) + default_params = { "num_train_epochs": 4, "learning_rate": 3e-5, @@ -82,10 +86,9 @@ def argument_parsing(parser): "gradient_accumulation_steps": 8, "gradient_checkpointing": False, "datasets": ["webgpt"], + "fp16": True, + "tokenizer_name": training_conf["model_name"], } - args = parser.parse_args() - with open(args.config, "r", encoding="utf-8") as f: - training_conf = yaml.safe_load(f.read()) params = {**default_params, **training_conf} params["gradient_accumulation_steps"] = int(params["gradient_accumulation_steps"]) From 061d6219530102d8703e463e0b93b3b9c630e97d Mon Sep 17 00:00:00 2001 From: Bobak Hashemi Date: Wed, 4 Jan 2023 20:51:19 -0500 Subject: [PATCH 6/6] removed old precommit pragma requirement --- model/reward/instructor/models.py | 1 - 1 file changed, 1 deletion(-) diff --git a/model/reward/instructor/models.py b/model/reward/instructor/models.py index 084cfa51..c1891ed2 100644 --- a/model/reward/instructor/models.py +++ b/model/reward/instructor/models.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import torch from transformers import AutoModel