mirror of
https://github.com/wassname/Open-Assistant.git
synced 2026-07-02 17:00:28 +08:00
[merge] most of the bugs should be fixed. #77
This commit is contained in:
@@ -24,20 +24,10 @@ class WebGPTDataset(Dataset):
|
||||
'''
|
||||
os.makedirs('dataset', exist_ok=True)
|
||||
dataset = load_dataset("openai/webgpt_comparisons")
|
||||
if os.path.exists(index_cache):
|
||||
train_idx = torch.load(index_cache)
|
||||
else:
|
||||
train_idx = np.random.choice(range(len(dataset['train'])), int(len(dataset['train'])*0.8), replace=False)
|
||||
torch.save(set(train_idx.tolist()), index_cache)
|
||||
self.dataset = []
|
||||
self.dataset_index = []
|
||||
for idx, row in enumerate(dataset['train']):
|
||||
if mode == 'train' and idx in train_idx:
|
||||
self.dataset.append(webgpt_return_format(row))
|
||||
self.dataset_index.append(idx)
|
||||
elif idx not in train_idx and mode != 'train':
|
||||
self.dataset.append(webgpt_return_format(row))
|
||||
self.dataset_index.append(idx)
|
||||
self.dataset.append(webgpt_return_format(row))
|
||||
|
||||
# since this dataset was generated from 176B GPT-3
|
||||
# we needed some more sample generated from the starting model
|
||||
@@ -71,3 +61,6 @@ class WebGPTDataset(Dataset):
|
||||
|
||||
gen_neg = random.choice(self.additional[self.dataset_index[index]])
|
||||
return row['question'], row['pos'], row['neg'], gen_neg
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
model_name: facebook/galactica-125m
|
||||
learning_rate: 1e-5
|
||||
gradient_checkpointing: false
|
||||
gradient_accumulation_steps: 10
|
||||
per_device_train_batch_size: 6
|
||||
warmup_steps: 600
|
||||
loss: cls
|
||||
eval_steps: 200
|
||||
save_steps: 500
|
||||
max_length: 128
|
||||
num_train_epochs: 2
|
||||
datasets:
|
||||
- webgpt
|
||||
- hfsummary
|
||||
@@ -11,7 +11,11 @@
|
||||
|
||||
Some nice features to have
|
||||
|
||||
[ ]
|
||||
[] support additional negative samples generated from other models.
|
||||
|
||||
For example we can use galactica-125m to generate a TLDR and assume it was
|
||||
inferior than the human perference one
|
||||
|
||||
|
||||
'''
|
||||
from typing import Optional, Union
|
||||
@@ -35,7 +39,7 @@ class DataCollatorForPairRank:
|
||||
padding: Union[bool, str, PaddingStrategy] = True
|
||||
max_length: Optional[int] = None
|
||||
pad_to_multiple_of: Optional[int] = None
|
||||
drop_token_type: bool = False
|
||||
drop_token_type: bool = False # galactica
|
||||
|
||||
def __call__(self, features):
|
||||
|
||||
|
||||
@@ -77,7 +77,10 @@ class RankTrainer(Trainer):
|
||||
|
||||
return loss, logits
|
||||
|
||||
def prediction_step(self, model: nn.Module, inputs: Dict[str, Union[torch.Tensor, Any]], prediction_loss_only: bool, ignore_keys: Optional[List[str]] = None) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
|
||||
def prediction_step(self, model: nn.Module,
|
||||
inputs: Dict[str, Union[torch.Tensor, Any]],
|
||||
prediction_loss_only: bool,
|
||||
ignore_keys: Optional[List[str]] = None) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
|
||||
|
||||
with torch.no_grad():
|
||||
# compute loss on predict data
|
||||
|
||||
Reference in New Issue
Block a user