From e4c04daadf48944dff4626e11e92e0e553fe3ff7 Mon Sep 17 00:00:00 2001 From: deep1 Date: Sat, 22 Apr 2023 18:42:01 +0800 Subject: [PATCH] git clone script working --- README.md | 14 ++++++++------ loras/.gitkeep | 0 matmul_utils_4bit.py | 3 +++ mjc_notes.md | 8 ++++---- requirements/requirements.txt | 3 +++ scripts/clone-model.py | 33 +++++++++++++++++---------------- 6 files changed, 35 insertions(+), 26 deletions(-) delete mode 100644 loras/.gitkeep diff --git a/README.md b/README.md index 60e0b15..d6300f8 100644 --- a/README.md +++ b/README.md @@ -35,21 +35,23 @@ pip install -e . # download models ```sh -# # base models.... FIXME +huggingface-cli login +# download base models +python scripts/download-model.py decapoda-research/llama-7b-hf +# python scripts/download-model.py decapoda-research/llama-13b-hf +# python scripts/download-model.py decapoda-research/llama-30b-hf # download loras -python scripts/download-model.py chansung/alpaca-lora-30b -python scripts/download-model.py chansung/alpaca-lora-13b python scripts/download-model.py tloen/alpaca-lora-7b +# python scripts/download-model.py chansung/alpaca-lora-13b +# python scripts/download-model.py chansung/alpaca-lora-30b ``` # convert models ```sh -# download -python scripts/download-model.py tloen/alpaca-lora-7b -python scripts/download-model.py decapoda-research/llama-7b-hf + # convert python scripts/export_hf_checkpoint.py ./data/models/llama-7b-hf -l ./data/loras/tloen_alpaca-lora-7b # test diff --git a/loras/.gitkeep b/loras/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/matmul_utils_4bit.py b/matmul_utils_4bit.py index b995dfb..5ceb5e5 100644 --- a/matmul_utils_4bit.py +++ b/matmul_utils_4bit.py @@ -1,3 +1,6 @@ +""" +from https://raw.githubusercontent.com/johnsmith0031/alpaca_lora_4bit/ +""" import torch import numpy as np from gptq_llama import quant_cuda diff --git a/mjc_notes.md b/mjc_notes.md index 8696151..673bd80 100644 --- a/mjc_notes.md +++ b/mjc_notes.md @@ -33,10 +33,10 @@ pip install git+https://github.com/sterlind/GPTQ-for-LLaMa.git@lora_4bit ```sh # # base models.... FIXME -python scripts/download-model.py decapoda-research/llama-7b-hf -python scripts/download-model.py decapoda-research/llama-13b-hf -python scripts/download-model.py decapoda-research/llama-30b-hf -python scripts/download-model.py decapoda-research/llama-65b-hf +python scripts/clone-model.py decapoda-research/llama-7b-hf +python scripts/clone-model.py decapoda-research/llama-13b-hf +python scripts/clone-model.py decapoda-research/llama-30b-hf +python scripts/clone-model.py decapoda-research/llama-65b-hf # oh! you need to replace LLaMATokenizer with LlamaTokenizer in all model json files # download loras diff --git a/requirements/requirements.txt b/requirements/requirements.txt index f7b3587..5f19aae 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -11,3 +11,6 @@ git+https://github.com/huggingface/transformers.git@656e869 # git+https://github.com/sterlind/GPTQ-for-LLaMa.git@lora_4bit git+https://github.com/wassname/peft.git -e . +# misc +gitpython +black diff --git a/scripts/clone-model.py b/scripts/clone-model.py index 8980490..190b97a 100644 --- a/scripts/clone-model.py +++ b/scripts/clone-model.py @@ -4,34 +4,35 @@ clones models from Hugging Face to models/model-name. Example: python clone-model.py facebook/opt-1.3b +this seems nicer than the previous script https://github.com/oobabooga/text-generation-webui/blob/main/download-model.py + ''' from git import Repo import argparse -from tqdm.auto import tqdm -from git import RemoteProgress - +import os +import subprocess parser = argparse.ArgumentParser() parser.add_argument('MODEL', type=str, default=None, help="`tloen/alpaca-lora-7b`") -parser.add_argument('--branch', type=str, default='main', help='Name of the Git branch to download from.') +parser.add_argument('-b', '--branch', type=str, default='main', help='Name of the Git branch to download from.') +parser.add_argument('-t', '--text_only', action='store_true') args = parser.parse_args() -class CloneProgress(RemoteProgress): - """tqdm progress bar for GitPython""" - def __init__(self): - super().__init__() - self.pbar = tqdm() - - def update(self, op_code, cur_count, max_count=None, message=''): - self.pbar.total = max_count - self.pbar.n = cur_count - self.pbar.refresh() if __name__ == '__main__': model = args.MODEL repo = 'https://huggingface.co/' + model name = model.replace('/', '_') - dest = f'./models/{name}' + output_folder = './data/loras' if 'lora' in name else './data/models' + dest = f'{output_folder}/{name}_git' + + if args.text_only: + os.environ['GIT_LFS_SKIP_SMUDGE']="1" + + + result = subprocess.run(['git', 'lfs'], capture_output=True) + assert result.returncode==0, 'git lfs should be installed' + print(f'cloning "{repo}" to "{dest}"') - Repo.clone_from(repo, dest, progress=CloneProgress()) + Repo.clone_from(repo, dest, multi_options=['--depth=1'])