This commit is contained in:
wassname
2025-06-02 05:04:46 +00:00
parent dda269f58a
commit 6b882373bb
3 changed files with 3054 additions and 26 deletions
+19
View File
@@ -1,3 +1,22 @@
In this form I SFT some base models, so they are suitable for testing alternatives to DPO. It's for my project: https://github.com/wassname/repr-preference-optimization
I train:
- [ ] HuggingFaceTB/SmolLM2-135M
- [ ] HuggingFaceTB/SmolLM2-360M
- [ ] HuggingFaceTB/SmolLM2-1.7B
- [ ] Qwen/Qwen3-0.6B-Base
- [ ] Qwen/Qwen2-4B-Base
- [x] https://huggingface.co/wassname/llama-3-2-1b-sft
Other peoples base->sft models
- allenai/OLMo-2-0425-1B-SFT
- allenai/OLMoE-1B-7B-0924-SFT
- https://huggingface.co/allenai/OLMo-2-0425-1B-SFT
- https://huggingface.co/allenai/OLMo-7B-SFT-hf
- https://huggingface.co/princeton-nlp/Llama-3-Base-8B-SFT
Original readme
----
# Simple Preference Optimization (SimPO)
This repository contains the code and released models for our paper [SimPO: Simple Preference Optimization with a Reference-Free Reward](https://arxiv.org/abs/2405.14734). We propose a simpler and more effective preference optimization algorithm than DPO (Direct Preference Optimization) without using a reference model. SimPO outperforms DPO and its latest variants across AlpacaEval 2, MT-Bench, and Arena-Hard benchmarks under various settings.
+22 -26
View File
@@ -1,34 +1,30 @@
[tool.poetry]
[project]
name = "simpo"
version = "0.1.0"
description = ""
authors = ["wassname <1103714+wassname@users.noreply.github.com>"]
readme = "README.md"
package-mode = false
requires-python = ">=3.10.9,<4.0"
dependencies = [
"torch>=2.1.2",
"deepspeed>=0.14",
"wandb>=0.18.2",
# gitbased dep
"alignment-handbook",
]
[dependency-groups]
dev = [
"pytest",
"ipykernel>=6.29.5",
]
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
requires = ["hatchling"]
build-backend = "hatchling.build"
[[tool.poetry.source]]
name = "pytorch"
url = "https://download.pytorch.org/whl/cu121"
priority = "explicit"
[tool.hatch.build.targets.wheel]
packages = ["simpo"]
[tool.poetry.dependencies]
python = "^3.10.9,<4.0"
torch = { version = ">=2.1.2+cu121", source = "pytorch" }
alignment-handbook = {git = "https://github.com/huggingface/alignment-handbook.git"}
#huggingface-hub = "~0.23.0"
# trl = "~0.9.0"
deepspeed = "^0.14"
# transformers="~4.39.3"
# python -m pip install flash-attn --no-build-isolation
wandb = "^0.18.2"
[tool.poetry.group.dev.dependencies]
pytest = "*"
ipykernel = "^6.29.5"
[virtualenvs]
in-project = true
[tool.uv.sources]
alignment-handbook = { git = "https://github.com/huggingface/alignment-handbook.git" }
Generated
+3013
View File
File diff suppressed because it is too large Load Diff