[fix] Fix missing russian and update readme

This commit is contained in:
theblackcat102
2023-01-20 06:16:26 +00:00
parent 74cb9aaa5a
commit 6cd62e3d48
2 changed files with 20 additions and 2 deletions
+17
View File
@@ -60,6 +60,23 @@ python trainer.py --configs defaults your-model-name --deepspeed
## Dataset choices
To specify which translation pair for [WMT](https://huggingface.co/datasets/wmt19) and [TED Talk](https://huggingface.co/datasets/ted_talks_iwslt) translation simply add the supported language pair at the postfix
```
datasets:
- wmt2019_zh-en
- wmt2019_ru-en
- wmt2019_de-en
- ted_trans_nl-en
- ted_trans_de-ja
```
Currently only these languages are supported via prompt translation:
```
ar,de,fr,en,it,nl,tr,ru,ms,ko,ja,zh
```
## Results
Experimental results in wandb
@@ -63,10 +63,11 @@ TRANSLATION_PROMPT = {
"{}, jemahkan ayat ini kepada bahasa melayu"
],
"en": ["{}. translate to english", "{} write in english", "english translation: '{}'"],
"tr": ["{}. translate to turkish", "{} write in turkish", "turkish translation: '{}'"],
"ru": ["помогите мне перевести это на русский : {}", "{} перевести на русский язык", "russian translation: '{}'"],
"tr": ["{}. türkçeye çevi̇ri̇n", "{} write in turkish", "turkish translation: '{}'", "türkçeye çevi̇rmek: {}"],
"it": ["{}. translate to italian", "{} write in italian", "italian translation: '{}'"],
"nl": ["{}. translate to dutch", "{} write in dutch", "dutch translation: '{}'"],
"vi": ["{}. translate to vietnamese", "{} write in vietnamese", "vietnamese translation: '{}'"],
"vi": ["{}. Dịch sang tiếng việt nam", "{} write in vietnamese", "vietnamese translation: '{}'"],
"ar": ["{}. translate to arabic", "{} write in arabic", "arabic translation: '{}'"],
}
class TranslationPair(Dataset):