mirror of
https://github.com/wassname/Open-Assistant.git
synced 2026-06-28 16:20:34 +08:00
[fix] Fix missing russian and update readme
This commit is contained in:
@@ -60,6 +60,23 @@ python trainer.py --configs defaults your-model-name --deepspeed
|
||||
|
||||
## Dataset choices
|
||||
|
||||
To specify which translation pair for [WMT](https://huggingface.co/datasets/wmt19) and [TED Talk](https://huggingface.co/datasets/ted_talks_iwslt) translation simply add the supported language pair at the postfix
|
||||
|
||||
```
|
||||
datasets:
|
||||
- wmt2019_zh-en
|
||||
- wmt2019_ru-en
|
||||
- wmt2019_de-en
|
||||
- ted_trans_nl-en
|
||||
- ted_trans_de-ja
|
||||
```
|
||||
|
||||
Currently only these languages are supported via prompt translation:
|
||||
|
||||
```
|
||||
ar,de,fr,en,it,nl,tr,ru,ms,ko,ja,zh
|
||||
```
|
||||
|
||||
## Results
|
||||
|
||||
Experimental results in wandb
|
||||
|
||||
@@ -63,10 +63,11 @@ TRANSLATION_PROMPT = {
|
||||
"{}, jemahkan ayat ini kepada bahasa melayu"
|
||||
],
|
||||
"en": ["{}. translate to english", "{} write in english", "english translation: '{}'"],
|
||||
"tr": ["{}. translate to turkish", "{} write in turkish", "turkish translation: '{}'"],
|
||||
"ru": ["помогите мне перевести это на русский : {}", "{} перевести на русский язык", "russian translation: '{}'"],
|
||||
"tr": ["{}. türkçeye çevi̇ri̇n", "{} write in turkish", "turkish translation: '{}'", "türkçeye çevi̇rmek: {}"],
|
||||
"it": ["{}. translate to italian", "{} write in italian", "italian translation: '{}'"],
|
||||
"nl": ["{}. translate to dutch", "{} write in dutch", "dutch translation: '{}'"],
|
||||
"vi": ["{}. translate to vietnamese", "{} write in vietnamese", "vietnamese translation: '{}'"],
|
||||
"vi": ["{}. Dịch sang tiếng việt nam", "{} write in vietnamese", "vietnamese translation: '{}'"],
|
||||
"ar": ["{}. translate to arabic", "{} write in arabic", "arabic translation: '{}'"],
|
||||
}
|
||||
class TranslationPair(Dataset):
|
||||
|
||||
Reference in New Issue
Block a user