mirror of
https://github.com/wassname/Open-Assistant.git
synced 2026-06-27 16:10:30 +08:00
[feature] add pythia and limit translation pair
This commit is contained in:
@@ -100,6 +100,8 @@ class WMT2019(TranslationPair):
|
||||
else: # translating in reverse direction
|
||||
source = random.choice(TRANSLATION_PROMPT[src]).format(row[tgt])
|
||||
self.pairs.append((source, row[src]))
|
||||
if len(self.pairs) > 100000:
|
||||
break
|
||||
|
||||
|
||||
class DiveMT(TranslationPair):
|
||||
|
||||
@@ -25,6 +25,10 @@ def get_tokenizer(conf):
|
||||
tokenizer.add_special_tokens({"pad_token": tokenizer.eos_token, "sep_token": "<|extratoken_100|>"})
|
||||
elif "codegen" in conf.model_name:
|
||||
tokenizer.add_special_tokens({"pad_token": "<|endoftext|>", "sep_token": "<|endoftext|>"})
|
||||
elif "pythia" in conf.model_name:
|
||||
tokenizer.add_special_tokens(
|
||||
{"pad_token": "<|padding|>", "sep_token": "<|endoftext|>", "eos_token": "<|endoftext|>"}
|
||||
)
|
||||
|
||||
additional_special_tokens = (
|
||||
[]
|
||||
|
||||
Reference in New Issue
Block a user