From e4c04daadf48944dff4626e11e92e0e553fe3ff7 Mon Sep 17 00:00:00 2001
From: deep1 <uzair@threespringscapital.com>
Date: Sat, 22 Apr 2023 18:42:01 +0800
Subject: [PATCH] git clone script working

---
 README.md                     | 14 ++++++++------
 loras/.gitkeep                |  0
 matmul_utils_4bit.py          |  3 +++
 mjc_notes.md                  |  8 ++++----
 requirements/requirements.txt |  3 +++
 scripts/clone-model.py        | 33 +++++++++++++++++----------------
 6 files changed, 35 insertions(+), 26 deletions(-)
 delete mode 100644 loras/.gitkeep

diff --git a/README.md b/README.md
index 60e0b15..d6300f8 100644
--- a/README.md
+++ b/README.md
@@ -35,21 +35,23 @@ pip install -e .
 # download models
 
 ```sh
-# # base models.... FIXME
+huggingface-cli login
 
+# download base models
+python scripts/download-model.py decapoda-research/llama-7b-hf
+# python scripts/download-model.py decapoda-research/llama-13b-hf
+# python scripts/download-model.py decapoda-research/llama-30b-hf
 
 # download loras
-python scripts/download-model.py chansung/alpaca-lora-30b
-python scripts/download-model.py chansung/alpaca-lora-13b
 python scripts/download-model.py tloen/alpaca-lora-7b
+# python scripts/download-model.py chansung/alpaca-lora-13b
+# python scripts/download-model.py chansung/alpaca-lora-30b
 ```
 
 # convert models
 
 ```sh
-# download
-python scripts/download-model.py tloen/alpaca-lora-7b
-python scripts/download-model.py decapoda-research/llama-7b-hf
+
 # convert
 python scripts/export_hf_checkpoint.py ./data/models/llama-7b-hf -l ./data/loras/tloen_alpaca-lora-7b
 # test
diff --git a/loras/.gitkeep b/loras/.gitkeep
deleted file mode 100644
index e69de29..0000000
diff --git a/matmul_utils_4bit.py b/matmul_utils_4bit.py
index b995dfb..5ceb5e5 100644
--- a/matmul_utils_4bit.py
+++ b/matmul_utils_4bit.py
@@ -1,3 +1,6 @@
+"""
+from https://raw.githubusercontent.com/johnsmith0031/alpaca_lora_4bit/
+"""
 import torch
 import numpy as np
 from gptq_llama import quant_cuda
diff --git a/mjc_notes.md b/mjc_notes.md
index 8696151..673bd80 100644
--- a/mjc_notes.md
+++ b/mjc_notes.md
@@ -33,10 +33,10 @@ pip install git+https://github.com/sterlind/GPTQ-for-LLaMa.git@lora_4bit
 
 ```sh
 # # base models.... FIXME
-python scripts/download-model.py decapoda-research/llama-7b-hf
-python scripts/download-model.py decapoda-research/llama-13b-hf
-python scripts/download-model.py decapoda-research/llama-30b-hf
-python scripts/download-model.py decapoda-research/llama-65b-hf
+python scripts/clone-model.py decapoda-research/llama-7b-hf
+python scripts/clone-model.py decapoda-research/llama-13b-hf
+python scripts/clone-model.py decapoda-research/llama-30b-hf
+python scripts/clone-model.py decapoda-research/llama-65b-hf
 # oh! you need to replace LLaMATokenizer with LlamaTokenizer in all model json files
 
 # download loras
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index f7b3587..5f19aae 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -11,3 +11,6 @@ git+https://github.com/huggingface/transformers.git@656e869
 # git+https://github.com/sterlind/GPTQ-for-LLaMa.git@lora_4bit
 git+https://github.com/wassname/peft.git
 -e .
+# misc
+gitpython
+black
diff --git a/scripts/clone-model.py b/scripts/clone-model.py
index 8980490..190b97a 100644
--- a/scripts/clone-model.py
+++ b/scripts/clone-model.py
@@ -4,34 +4,35 @@ clones models from Hugging Face to models/model-name.
 Example:
 python clone-model.py facebook/opt-1.3b
 
+this seems nicer than the previous script https://github.com/oobabooga/text-generation-webui/blob/main/download-model.py
+
 '''
 
 from git import Repo
 import argparse
-from tqdm.auto import tqdm
-from git import RemoteProgress
-
+import os
+import subprocess
 
 parser = argparse.ArgumentParser()
 parser.add_argument('MODEL', type=str, default=None, help="`tloen/alpaca-lora-7b`")
-parser.add_argument('--branch', type=str, default='main', help='Name of the Git branch to download from.')
+parser.add_argument('-b', '--branch', type=str, default='main', help='Name of the Git branch to download from.')
+parser.add_argument('-t', '--text_only', action='store_true')
 args = parser.parse_args()
 
-class CloneProgress(RemoteProgress):
-    """tqdm progress bar for GitPython"""
-    def __init__(self):
-        super().__init__()
-        self.pbar = tqdm()
-
-    def update(self, op_code, cur_count, max_count=None, message=''):
-        self.pbar.total = max_count
-        self.pbar.n = cur_count
-        self.pbar.refresh()
 
 if __name__ == '__main__':
     model = args.MODEL
     repo = 'https://huggingface.co/' + model
     name = model.replace('/', '_')
-    dest = f'./models/{name}'
+    output_folder = './data/loras' if 'lora' in name else './data/models'
+    dest = f'{output_folder}/{name}_git'
+    
+    if args.text_only:
+        os.environ['GIT_LFS_SKIP_SMUDGE']="1"
+        
+
+    result = subprocess.run(['git', 'lfs'], capture_output=True)
+    assert result.returncode==0, 'git lfs should be installed'
+        
     print(f'cloning "{repo}" to "{dest}"')
-    Repo.clone_from(repo, dest, progress=CloneProgress())
+    Repo.clone_from(repo, dest, multi_options=['--depth=1'])