diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml new file mode 100644 index 0000000..4b4e012 --- /dev/null +++ b/.github/workflows/quality.yml @@ -0,0 +1,31 @@ +name: Quality + +on: + push: + branches: + - main + - v*-release + pull_request: + branches: + - main + +jobs: + + check_code_quality: + name: Check code quality + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + - name: Setup Python environment + uses: actions/setup-python@v2 + with: + python-version: 3.10.10 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install ".[quality]" + - name: Code quality + run: | + make quality + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4610f7a --- /dev/null +++ b/Makefile @@ -0,0 +1,44 @@ +.PHONY: style quality + +# make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes!) +export PYTHONPATH = src + +check_dirs := src tests scripts templates + +style: + python -m black --line-length 119 --target-version py310 $(check_dirs) setup.py + python -m isort $(check_dirs) setup.py + +quality: + python -m black --check --line-length 119 --target-version py310 $(check_dirs) setup.py + python -m isort --check-only $(check_dirs) setup.py + python -m flake8 --max-line-length 119 $(check_dirs) setup.py + + +# Release stuff + +pre-release: + python src/alignment/utils/release.py + +pre-patch: + python src/alignment/utils/release.py --patch + +post-release: + python src/alignment/utils/release.py --post_release + +post-patch: + python src/alignment/utils/release.py --post_release --patch + +wheels: + python setup.py bdist_wheel && python setup.py sdist + +wheels_clean: + rm -rf build && rm -rf dist + +pypi_upload: + python -m pip install twine + twine upload dist/* -r pypi + +pypi_test_upload: + python -m pip install twine + twine upload dist/* -r pypitest --repository-url=https://test.pypi.org/legacy/ diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..c2ee645 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,41 @@ +[isort] +default_section = FIRSTPARTY +ensure_newline_before_comments = True +force_grid_wrap = 0 +include_trailing_comma = True +known_first_party = alignment +known_third_party = + transformers + datasets + fugashi + git + h5py + matplotlib + nltk + numpy + packaging + pandas + psutil + pytest + rouge_score + sacrebleu + seqeval + sklearn + streamlit + torch + tqdm + +line_length = 119 +lines_after_imports = 2 +multi_line_output = 3 +use_parentheses = True + +[flake8] +ignore = E203, E501, E741, W503, W605 +max-line-length = 119 +per-file-ignores = + # imported but unused + __init__.py: F401 + +[tool:pytest] +doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..0bc1cbd --- /dev/null +++ b/setup.py @@ -0,0 +1,136 @@ +# Copyright 2023 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Adapted from huggingface/transformers: https://github.com/huggingface/transformers/blob/21a2d900eceeded7be9edc445b56877b95eda4ca/setup.py + + +import re +import shutil +from pathlib import Path + +from setuptools import find_packages, setup + + +# Remove stale alignment.egg-info directory to avoid https://github.com/pypa/pip/issues/5466 +stale_egg_info = Path(__file__).parent / "alignment.egg-info" +if stale_egg_info.exists(): + print( + ( + "Warning: {} exists.\n\n" + "If you recently updated alignment, this is expected,\n" + "but it may prevent alignment from installing in editable mode.\n\n" + "This directory is automatically generated by Python's packaging tools.\n" + "I will remove it now.\n\n" + "See https://github.com/pypa/pip/issues/5466 for details.\n" + ).format(stale_egg_info) + ) + shutil.rmtree(stale_egg_info) + + +# IMPORTANT: all dependencies should be listed here with their version requirements, if any. +# * If a dependency is fast-moving (e.g. transformers), pin to the exact version +_deps = [ + "accelerate==0.23.0", + "bitsandbytes==0.41.1", + "black==23.1.0", + "datasets==2.12.0", + "deepspeed==0.9.5", + "einops==0.6.1", + "evaluate==0.4.0", + "flake8>=6.0.0", + "huggingface-hub>=0.14.1,<1.0", + "isort>=5.12.0", + "ninja==1.11.1", + "numpy>=1.24.2", + "packaging>=23.0", + "parameterized>=0.9.0", + "peft==0.5.0", + "protobuf<=3.20.2", # Needed to avoid conflicts with `transformers` + "pytest", + "safetensors==0.3.3", + "tensorboard", + "torch==2.0.1", + "transformers @ git+https://github.com/huggingface/transformers.git@b3961f7291307ee877ef1a4d057949597d805220", + "trl @ git+https://github.com/huggingface/trl.git@1e56ff0f166888973d69cd9d56be60a9f8edfedb", # TODO bump to next release, added for NEFTune + "tqdm>=4.64.1", +] + +# this is a lookup table with items like: +# +# tokenizers: "tokenizers==0.9.4" +# packaging: "packaging" +# +# some of the values are versioned whereas others aren't. +deps = {b: a for a, b in (re.findall(r"^(([^!=<>~ \[\]]+)(?:\[[^\]]+\])?(?:[!=<>~ ].*)?$)", x)[0] for x in _deps)} + + +def deps_list(*pkgs): + return [deps[pkg] for pkg in pkgs] + + +extras = {} +extras["tests"] = deps_list("pytest", "parameterized") +extras["torch"] = deps_list("torch") +extras["quality"] = deps_list("black", "isort", "flake8") +extras["dev"] = extras["quality"] + extras["tests"] + +# core dependencies shared across the whole project - keep this to a bare minimum :) +install_requires = [ + deps["accelerate"], + deps["bitsandbytes"], + deps["einops"], + deps["evaluate"], + deps["datasets"], + deps["deepspeed"], + deps["huggingface-hub"], + deps["ninja"], + deps["numpy"], + deps["packaging"], # utilities from PyPA to e.g., compare versions + deps["peft"], + deps["protobuf"], + deps["safetensors"], + deps["tensorboard"], + deps["tqdm"], # progress bars in model download and training scripts + deps["transformers"], +] + +setup( + name="alignment", + version="0.2.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots) + author="The Hugging Face team (past and future)", + author_email="lewis@huggingface.co", + description="The Alignment Handbook", + long_description=open("README.md", "r", encoding="utf-8").read(), + long_description_content_type="text/markdown", + keywords="nlp deep learning rlhf llm", + license="Apache", + url="https://github.com/huggingface/alignment", + package_dir={"": "src"}, + packages=find_packages("src"), + zip_safe=False, + extras_require=extras, + python_requires=">=3.10.9", + install_requires=install_requires, + classifiers=[ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + ], +)