Update README.md

2026-06-27 20:19:50 +08:00 · 2024-07-09 16:40:57 -04:00
parent 68cc8bc75a
commit 65a7ac97d5
1 changed files with 1 additions and 1 deletions
@@ -42,7 +42,7 @@ For DPO, we use the following hyperparameters for training.
 | Setting                  | β | Learning Rate |
 |------------------------|------|---------------|
 | Mistral-Base           | 0.01 | 5e-7      |
-| Mistral-Instruct       | 0.01 | 2e-7      |
+| Mistral-Instruct       | 0.01 | 5e-7      |
 | Llama3-Base            | 0.01 | 5e-7      |
 | Llama3-Instruct        | 0.01 | 7e-7      |
 | Llama3-Instruct v0.2   | 0.01 | 3e-7      |