From af1c62cd83da66342e20937dedd7ae3839f343bf Mon Sep 17 00:00:00 2001 From: theblackcat102 Date: Tue, 7 Feb 2023 01:23:54 +0000 Subject: [PATCH] [feature] Add missing hindi and spanish prompt for translation --- model/supervised_finetuning/custom_datasets/translation.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/model/supervised_finetuning/custom_datasets/translation.py b/model/supervised_finetuning/custom_datasets/translation.py index 18cb9a09..cece0b43 100644 --- a/model/supervised_finetuning/custom_datasets/translation.py +++ b/model/supervised_finetuning/custom_datasets/translation.py @@ -62,7 +62,8 @@ TRANSLATION_PROMPT = { "{} how do we write in Malay", "{} give me the malay translation", "{} , berikan saya terjemahan dalam bahasa melayu", - "{}, Jemahan di bahasa melayu" "{}, jemahkan ayat ini kepada bahasa melayu", + "{}, Jemahan di bahasa melayu", + "{}, jemahkan ayat ini kepada bahasa melayu", ], "en": ["{}. translate to english", "{} write in english", "english translation: '{}'"], "ru": ["помогите мне перевести это на русский : {}", "{} перевести на русский язык", "russian translation: '{}'"], @@ -71,6 +72,8 @@ TRANSLATION_PROMPT = { "nl": ["{}. translate to dutch", "{} write in dutch", "dutch translation: '{}'"], "vi": ["{}. Dịch sang tiếng việt nam", "{} write in vietnamese", "vietnamese translation: '{}'"], "ar": ["{}. translate to arabic", "{} write in arabic", "arabic translation: '{}'"], + "es": ["{}. translate to spanish", "{} write in spanish", "spanish translation: '{}'"], + "hi": ["{}. translate to hindi", "{}. translate to bengali", "{} write in hindi", "bengali translation: '{}'"], } @@ -114,8 +117,6 @@ class WMT2019(TranslationPair): else: # translating in reverse direction source = random.choice(TRANSLATION_PROMPT[src]).format(row[tgt]) self.pairs.append((source, row[src])) - if len(self.pairs) > 100000: - break class DiveMT(TranslationPair):