From a5a2625e2d15f327d89ee89708284971ba96e59f Mon Sep 17 00:00:00 2001 From: theblackcat102 Date: Sun, 1 Jan 2023 02:55:54 +0000 Subject: [PATCH] [merge] most of the bugs should be fixed. #77 --- model/reward/instructor/cls_dataset.py | 15 ++++----------- .../test-galactica-125m-classification.yml | 14 ++++++++++++++ model/reward/instructor/rank_datasets.py | 8 ++++++-- model/reward/instructor/trainer.py | 5 ++++- 4 files changed, 28 insertions(+), 14 deletions(-) create mode 100644 model/reward/instructor/configs/test-galactica-125m-classification.yml diff --git a/model/reward/instructor/cls_dataset.py b/model/reward/instructor/cls_dataset.py index 54bbd19e..ff824d19 100644 --- a/model/reward/instructor/cls_dataset.py +++ b/model/reward/instructor/cls_dataset.py @@ -24,20 +24,10 @@ class WebGPTDataset(Dataset): ''' os.makedirs('dataset', exist_ok=True) dataset = load_dataset("openai/webgpt_comparisons") - if os.path.exists(index_cache): - train_idx = torch.load(index_cache) - else: - train_idx = np.random.choice(range(len(dataset['train'])), int(len(dataset['train'])*0.8), replace=False) - torch.save(set(train_idx.tolist()), index_cache) self.dataset = [] self.dataset_index = [] for idx, row in enumerate(dataset['train']): - if mode == 'train' and idx in train_idx: - self.dataset.append(webgpt_return_format(row)) - self.dataset_index.append(idx) - elif idx not in train_idx and mode != 'train': - self.dataset.append(webgpt_return_format(row)) - self.dataset_index.append(idx) + self.dataset.append(webgpt_return_format(row)) # since this dataset was generated from 176B GPT-3 # we needed some more sample generated from the starting model @@ -71,3 +61,6 @@ class WebGPTDataset(Dataset): gen_neg = random.choice(self.additional[self.dataset_index[index]]) return row['question'], row['pos'], row['neg'], gen_neg + + + diff --git a/model/reward/instructor/configs/test-galactica-125m-classification.yml b/model/reward/instructor/configs/test-galactica-125m-classification.yml new file mode 100644 index 00000000..1ad1f47c --- /dev/null +++ b/model/reward/instructor/configs/test-galactica-125m-classification.yml @@ -0,0 +1,14 @@ +model_name: facebook/galactica-125m +learning_rate: 1e-5 +gradient_checkpointing: false +gradient_accumulation_steps: 10 +per_device_train_batch_size: 6 +warmup_steps: 600 +loss: cls +eval_steps: 200 +save_steps: 500 +max_length: 128 +num_train_epochs: 2 +datasets: + - webgpt + - hfsummary \ No newline at end of file diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py index 3d122915..4ba6293c 100644 --- a/model/reward/instructor/rank_datasets.py +++ b/model/reward/instructor/rank_datasets.py @@ -11,7 +11,11 @@ Some nice features to have - [ ] + [] support additional negative samples generated from other models. + + For example we can use galactica-125m to generate a TLDR and assume it was + inferior than the human perference one + ''' from typing import Optional, Union @@ -35,7 +39,7 @@ class DataCollatorForPairRank: padding: Union[bool, str, PaddingStrategy] = True max_length: Optional[int] = None pad_to_multiple_of: Optional[int] = None - drop_token_type: bool = False + drop_token_type: bool = False # galactica def __call__(self, features): diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py index de0b011a..48fc4e8d 100644 --- a/model/reward/instructor/trainer.py +++ b/model/reward/instructor/trainer.py @@ -77,7 +77,10 @@ class RankTrainer(Trainer): return loss, logits - def prediction_step(self, model: nn.Module, inputs: Dict[str, Union[torch.Tensor, Any]], prediction_loss_only: bool, ignore_keys: Optional[List[str]] = None) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]: + def prediction_step(self, model: nn.Module, + inputs: Dict[str, Union[torch.Tensor, Any]], + prediction_loss_only: bool, + ignore_keys: Optional[List[str]] = None) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]: with torch.no_grad(): # compute loss on predict data