[merge] most of the bugs should be fixed. #77

2026-07-02 17:00:28 +08:00 · 2023-01-01 02:55:54 +00:00
parent e27a3eb3c7
commit a5a2625e2d
4 changed files with 28 additions and 14 deletions
@@ -24,20 +24,10 @@ class WebGPTDataset(Dataset):
        '''
        os.makedirs('dataset', exist_ok=True)
        dataset = load_dataset("openai/webgpt_comparisons")
-        if os.path.exists(index_cache):
-            train_idx = torch.load(index_cache)
-        else:
-            train_idx = np.random.choice(range(len(dataset['train'])), int(len(dataset['train'])*0.8), replace=False)
-            torch.save(set(train_idx.tolist()), index_cache)
        self.dataset = []
        self.dataset_index = []
        for idx, row in enumerate(dataset['train']):
-            if mode == 'train' and idx in train_idx:
-                self.dataset.append(webgpt_return_format(row))
-                self.dataset_index.append(idx)
-            elif idx not in train_idx and mode != 'train':
-                self.dataset.append(webgpt_return_format(row))
-                self.dataset_index.append(idx)
+            self.dataset.append(webgpt_return_format(row))

        # since this dataset was generated from 176B GPT-3
        # we needed some more sample generated from the starting model
@@ -71,3 +61,6 @@ class WebGPTDataset(Dataset):

        gen_neg = random.choice(self.additional[self.dataset_index[index]])
        return row['question'], row['pos'], row['neg'], gen_neg
+
+
+
@@ -0,0 +1,14 @@
+model_name: facebook/galactica-125m
+learning_rate: 1e-5
+gradient_checkpointing: false
+gradient_accumulation_steps: 10
+per_device_train_batch_size: 6
+warmup_steps: 600
+loss: cls
+eval_steps: 200
+save_steps: 500
+max_length: 128
+num_train_epochs: 2
+datasets:
+  - webgpt
+  - hfsummary
@@ -11,7 +11,11 @@

    Some nice features to have

-    [ ] 
+    [] support additional negative samples generated from other models.
+
+        For example we can use galactica-125m to generate a TLDR and assume it was
+        inferior than the human perference one
+

 '''
 from typing import Optional, Union
@@ -35,7 +39,7 @@ class DataCollatorForPairRank:
    padding: Union[bool, str, PaddingStrategy] = True
    max_length: Optional[int] = None
    pad_to_multiple_of: Optional[int] = None
-    drop_token_type: bool = False
+    drop_token_type: bool = False # galactica

    def __call__(self, features):

@@ -77,7 +77,10 @@ class RankTrainer(Trainer):

        return loss, logits

-    def prediction_step(self, model: nn.Module, inputs: Dict[str, Union[torch.Tensor, Any]], prediction_loss_only: bool, ignore_keys: Optional[List[str]] = None) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
+    def prediction_step(self, model: nn.Module,
+            inputs: Dict[str, Union[torch.Tensor, Any]],
+            prediction_loss_only: bool,
+            ignore_keys: Optional[List[str]] = None) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:

        with torch.no_grad():
            # compute loss on predict data