Add auto_insert_empty_system_msg config flag (#123)

* Make system messages optional

Also use the `maybe_insert_system_message` in dpo setting

* add `auto_insert_empty_system_msg` flag

* add `auto_insert_empty_system_msg`

* add auto_insert_empty_system_msg

* Update src/alignment/configs.py

Co-authored-by: lewtun <lewis.c.tunstall@gmail.com>

* make style

---------

Co-authored-by: lewtun <lewis.c.tunstall@gmail.com>
This commit is contained in:
Bram Vanroy
2024-02-28 20:05:44 +01:00
committed by GitHub
parent 87cc800498
commit d17fd7cd3b
4 changed files with 27 additions and 7 deletions
+5 -1
View File
@@ -93,7 +93,11 @@ def main():
#####################
raw_datasets = raw_datasets.map(
apply_chat_template,
fn_kwargs={"tokenizer": tokenizer, "task": "dpo"},
fn_kwargs={
"tokenizer": tokenizer,
"task": "dpo",
"auto_insert_empty_system_msg": data_args.auto_insert_empty_system_msg,
},
num_proc=data_args.preprocessing_num_workers,
remove_columns=column_names,
desc="Formatting comparisons with prompt template",
+5 -1
View File
@@ -100,7 +100,11 @@ def main():
#####################
raw_datasets = raw_datasets.map(
apply_chat_template,
fn_kwargs={"tokenizer": tokenizer, "task": "sft"},
fn_kwargs={
"tokenizer": tokenizer,
"task": "sft",
"auto_insert_empty_system_msg": data_args.auto_insert_empty_system_msg,
},
num_proc=data_args.preprocessing_num_workers,
remove_columns=column_names,
desc="Applying chat template",