diff --git a/README.md b/README.md index ee0c6a5..7b24f2e 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ However, we know from the [InstructGPT](https://huggingface.co/papers/2203.02155 The Alignment Handbook aims to fill that gap by providing the community with a series of robust training recipes that span the whole pipeline. ## News 🗞️ - +* **January 18, 2024:** We release a suite of Evalutions of DPO vs KTO vs IPO, see the [recipe](recipes/pref_align_scan/README.md) and the [blogpost](https://huggingface.co/blog/pref-tuning) * **November 10, 2023:** We release all the training code to replicate Zephyr-7b-β 🪁! We also release [No Robots](https://huggingface.co/datasets/HuggingFaceH4/no_robots), a brand new dataset of 10,000 instructions and demonstrations written entirely by skilled human annotators. ## Links 🔗 diff --git a/recipes/pref_align_scan/README.md b/recipes/pref_align_scan/README.md new file mode 100644 index 0000000..767a742 --- /dev/null +++ b/recipes/pref_align_scan/README.md @@ -0,0 +1,48 @@ +# Comparing Preference Alignment Algorithms +This directory contains various comparisons for three algorithms: DPO, IPO, and KTO. Each algorithm has been run in different hyperparameter configurations to study their performance. Two different models and datasets have been used to compare the performance of each algorithm: + +- zephyr-beta-sft and Ultrafeedback +- OpenHermes-2.5 and the OpenOrca datasets + +We release a collection containing the datasets and models used for these experiments, if you require the other trained models, we can release them on request. +You can find a longer decription of there results in our [blogpost](https://huggingface.co/blog/pref-tuning) +## Comparisons +For each algorithm, we aim to tune the beta parameter for a fixed learning rate. We vary beta from 0.1-0.9 in steps of 0.1, we have also found that in certain configurations a tiny value of beta, 0.01, can be effective. So we have included this smaller value in all our comparisons. + +## Usage +The experiments can be launched with the following bash script: +``` +#!/bin/bash + +# Define an array containing the base configs we wish to fine tune +configs=("zephyr" "openhermes") +# Define an array of loss types +loss_types=("sigmoid" "kto_pair" "ipo") + +# Define an array of beta values +betas=("0.01" "0.1" "0.2" "0.3" "0.4" "0.5" "0.6" "0.7" "0.8" "0.9") + +# Outer loop for loss types +for config in "${configs[@]}"; do + for loss_type in "${loss_types[@]}"; do + + # Inner loop for beta values + for beta in "${betas[@]}"; do + + # Determine the job name and model revision based on loss type + job_name="$config_${loss_type}_beta_${beta}" + model_revision="${loss_type}-${beta}" + + # Submit the job + sbatch --job-name=${job_name} recipes/launch.slurm pref_align_scan dpo $config deepspeed_zero3 \ + "--beta=${beta} --loss_type=${loss_type} --output_dir=data/$config-7b-align-scan-${loss_type}-beta-${beta} --hub_model_revision=${model_revision}" + done + done +done +``` + + + + + + diff --git a/recipes/pref_align_scan/dpo/config_openhermes.yaml b/recipes/pref_align_scan/dpo/config_openhermes.yaml new file mode 100644 index 0000000..93d9ef3 --- /dev/null +++ b/recipes/pref_align_scan/dpo/config_openhermes.yaml @@ -0,0 +1,41 @@ +# Model arguments +model_name_or_path: teknium/OpenHermes-2.5-Mistral-7B +torch_dtype: null + +# Data training arguments +dataset_mixer: + HuggingFaceH4/orca_dpo_pairs: 1.0 +dataset_splits: +- train_prefs +- test_prefs +preprocessing_num_workers: 12 + +# Training arguments with sensible defaults +bf16: true +beta: 0.01 +loss_type: sigmoid +do_eval: true +do_train: true +evaluation_strategy: steps +eval_steps: 100 +gradient_accumulation_steps: 2 +gradient_checkpointing: true +gradient_checkpointing_kwargs: + use_reentrant: False +hub_model_id: HuggingFaceH4/openhermes-2.5-mistral-7b-dpo +hub_model_revision: v1.0 + +learning_rate: 5.0e-7 +logging_steps: 10 +lr_scheduler_type: cosine +max_prompt_length: 512 +num_train_epochs: 1 +optim: adamw_torch +output_dir: data/openhermes-2.5-mistral-7b-dpo-v1.0 +per_device_train_batch_size: 8 +per_device_eval_batch_size: 8 +save_strategy: "steps" +save_steps: 100 +save_total_limit: 1 +seed: 42 +warmup_ratio: 0.1 \ No newline at end of file diff --git a/recipes/pref_align_scan/dpo/config_zephyr.yaml b/recipes/pref_align_scan/dpo/config_zephyr.yaml new file mode 100644 index 0000000..01899bd --- /dev/null +++ b/recipes/pref_align_scan/dpo/config_zephyr.yaml @@ -0,0 +1,39 @@ +# Model arguments +model_name_or_path: alignment-handbook/zephyr-7b-sft-full +torch_dtype: null + +# Data training arguments +dataset_mixer: + HuggingFaceH4/ultrafeedback_binarized: 1.0 +dataset_splits: +- train_prefs +- test_prefs +preprocessing_num_workers: 12 + +# Training arguments with sensible defaults +bf16: true +beta: 0.01 +loss_type: sigmoid +do_eval: true +evaluation_strategy: steps +eval_steps: 100 +gradient_accumulation_steps: 2 +gradient_checkpointing: true +gradient_checkpointing_kwargs: + use_reentrant: False +hub_model_id: zephyr-7b-align-scan +hub_model_revision: dpo-beta-0.01 +learning_rate: 5.0e-7 +logging_steps: 10 +lr_scheduler_type: cosine +max_prompt_length: 512 +num_train_epochs: 1 +optim: adamw_torch +output_dir: data/zephyr-7b-align-scan-dpo-beta-0.01 +per_device_train_batch_size: 8 +per_device_eval_batch_size: 8 +save_strategy: "steps" +save_steps: 100 +save_total_limit: 1 +seed: 42 +warmup_ratio: 0.1 \ No newline at end of file diff --git a/recipes/pref_align_scan/launch_scan.sh b/recipes/pref_align_scan/launch_scan.sh new file mode 100644 index 0000000..334b947 --- /dev/null +++ b/recipes/pref_align_scan/launch_scan.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Define an array containing the base configs we wish to fine tune +configs=("zephyr" "openhermes") +# Define an array of loss types +loss_types=("sigmoid" "kto_pair" "ipo") +# Define an array of beta values +betas=("0.01" "0.1" "0.2" "0.3" "0.4" "0.5" "0.6" "0.7" "0.8" "0.9") + +# Outer loop for loss types +for config in "${configs[@]}"; do + for loss_type in "${loss_types[@]}"; do + + # Inner loop for beta values + for beta in "${betas[@]}"; do + # Determine the job name and model revision based on loss type + job_name="$config_${loss_type}_beta_${beta}" + model_revision="${loss_type}-${beta}" + + # Submit the job + sbatch --job-name=${job_name} recipes/launch.slurm pref_align_scan dpo $config deepspeed_zero3 \ + "--beta=${beta} --loss_type=${loss_type} --output_dir=data/$config-7b-align-scan-${loss_type}-beta-${beta} --hub_model_revision=${model_revision}" + done + done +done \ No newline at end of file diff --git a/setup.py b/setup.py index 7e2b7f8..e5ed532 100644 --- a/setup.py +++ b/setup.py @@ -61,6 +61,7 @@ _deps = [ "protobuf<=3.20.2", # Needed to avoid conflicts with `transformers` "pytest", "safetensors>=0.3.3", + "sentencepiece>=0.1.99", "scipy", "tensorboard", "torch==2.1.2", @@ -98,6 +99,7 @@ install_requires = [ deps["evaluate"], deps["datasets"], deps["deepspeed"], + deps["hf_transfer"], deps["huggingface-hub"], deps["jinja2"], deps["ninja"], @@ -106,6 +108,7 @@ install_requires = [ deps["peft"], deps["protobuf"], deps["safetensors"], + deps["sentencepiece"], deps["scipy"], deps["tensorboard"], deps["tqdm"], # progress bars in model download and training scripts @@ -115,7 +118,7 @@ install_requires = [ setup( name="alignment-handbook", - version="0.3.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots) + version="0.4.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots) author="The Hugging Face team (past and future)", author_email="lewis@huggingface.co", description="The Alignment Handbook",