mirror of
https://github.com/wassname/alpaca_convert.git
synced 2026-06-27 17:47:04 +08:00
misc
This commit is contained in:
+5
-4
@@ -55,10 +55,10 @@ python scripts/download-model.py tloen/alpaca-lora-7b
|
|||||||
python scripts/download-model.py decapoda-research/llama-7b-hf
|
python scripts/download-model.py decapoda-research/llama-7b-hf
|
||||||
|
|
||||||
# convert
|
# convert
|
||||||
python scripts/export_hf_checkpoint.py ./models/decapoda-research_llama-7b-hf -l loras/tloen_alpaca-lora-7b
|
python scripts/01_export_hf_checkpoint.py ./models/decapoda-research_llama-7b-hf -l loras/tloen_alpaca-lora-7b
|
||||||
python scripts/export_hf_checkpoint.py ./models/decapoda-research_llama-13b-hf -l loras/chansung_alpaca-lora-13b # crash! 50GB+ needed
|
python scripts/01_export_hf_checkpoint.py ./models/decapoda-research_llama-13b-hf -l loras/chansung_alpaca-lora-13b # crash! 50GB+ needed
|
||||||
python scripts/export_hf_checkpoint.py ./models/decapoda-research_llama-30b-hf -l loras/chansung_alpaca-lora-30b
|
python scripts/01_export_hf_checkpoint.py ./models/decapoda-research_llama-30b-hf -l loras/chansung_alpaca-lora-30b
|
||||||
python scripts/export_hf_checkpoint.py ./models/decapoda-research_llama-60b-hf -l loras/chansung_alpaca-lora-60b
|
python scripts/01_export_hf_checkpoint.py ./models/decapoda-research_llama-60b-hf -l loras/chansung_alpaca-lora-60b
|
||||||
|
|
||||||
# test
|
# test
|
||||||
python scripts/test_01_delora.py models/tloen_alpaca-lora-7b-delorified
|
python scripts/test_01_delora.py models/tloen_alpaca-lora-7b-delorified
|
||||||
@@ -66,6 +66,7 @@ python scripts/test_01_delora.py models/chansung_alpaca-lora-13b-delorified
|
|||||||
# now compare what was generated during conversion `test_prompts.txt`, to the loaded version
|
# now compare what was generated during conversion `test_prompts.txt`, to the loaded version
|
||||||
|
|
||||||
# 4bit...
|
# 4bit...
|
||||||
|
CUDA_VISIBLE_DEVICES=0 python llama.py ./models/tloen_alpaca-lora-7b-delorified c4 --wbits 4 --true-sequential --act-order --groupsize 128 --save_safetensors ./models/tloen_alpaca-lora-7b-delorified/llama7b-4bit-128g.safetensors
|
||||||
|
|
||||||
# ggml conversion...
|
# ggml conversion...
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -14,17 +14,6 @@ def main(BASE_MODEL, LORA_MODEL, output_path=None):
|
|||||||
if output_path is None:
|
if output_path is None:
|
||||||
output_path = 'models/' + LORA_MODEL.split('/')[-1] + '-delorified'
|
output_path = 'models/' + LORA_MODEL.split('/')[-1] + '-delorified'
|
||||||
|
|
||||||
# BASE_MODEL = os.environ.get("BASE_MODEL", None)
|
|
||||||
# assert (
|
|
||||||
# BASE_MODEL
|
|
||||||
# ), "Please specify a value for BASE_MODEL environment variable, e.g. `export BASE_MODEL=huggyllama/llama-7b`" # noqa: E501
|
|
||||||
|
|
||||||
|
|
||||||
# LORA_MODEL = os.environ.get("BASE_MODEL", None)
|
|
||||||
# assert (
|
|
||||||
# LORA_MODEL
|
|
||||||
# ), "Please specify a value for LORA_MODEL environment variable, e.g. `export BASE_MODEL=tloen/alpaca-lora-7b`" # noqa: E501
|
|
||||||
|
|
||||||
tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL)
|
tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL)
|
||||||
|
|
||||||
base_model = LlamaForCausalLM.from_pretrained(
|
base_model = LlamaForCausalLM.from_pretrained(
|
||||||
@@ -33,8 +22,6 @@ def main(BASE_MODEL, LORA_MODEL, output_path=None):
|
|||||||
torch_dtype=torch.float16,
|
torch_dtype=torch.float16,
|
||||||
device_map={"": "cpu"},
|
device_map={"": "cpu"},
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO or load 4 bit?
|
|
||||||
|
|
||||||
first_weight = base_model.model.layers[0].self_attn.q_proj.weight
|
first_weight = base_model.model.layers[0].self_attn.q_proj.weight
|
||||||
first_weight_old = first_weight.clone()
|
first_weight_old = first_weight.clone()
|
||||||
@@ -70,9 +57,8 @@ def main(BASE_MODEL, LORA_MODEL, output_path=None):
|
|||||||
LlamaForCausalLM.save_pretrained(
|
LlamaForCausalLM.save_pretrained(
|
||||||
base_model, output_path, state_dict=deloreanized_sd, max_shard_size="400MB"
|
base_model, output_path, state_dict=deloreanized_sd, max_shard_size="400MB"
|
||||||
)
|
)
|
||||||
print(f'output {output_path}')
|
|
||||||
LlamaTokenizer.save_pretrained(tokenizer, output_path)
|
LlamaTokenizer.save_pretrained(tokenizer, output_path)
|
||||||
# FIXME also save tokenizer
|
print(f'output {output_path}')
|
||||||
|
|
||||||
from alpaca_convert.test import test_conversation
|
from alpaca_convert.test import test_conversation
|
||||||
o = test_conversation(lora_model.float(), tokenizer)
|
o = test_conversation(lora_model.float(), tokenizer)
|
||||||
@@ -83,10 +69,10 @@ def main(BASE_MODEL, LORA_MODEL, output_path=None):
|
|||||||
|
|
||||||
if __name__=="__main__":
|
if __name__=="__main__":
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('model', type=str)
|
parser.add_argument('model', type=str, help='Huggingface model e.g. `./models/decapoda-research_llama-7b-hf`')
|
||||||
parser.add_argument('-l', '--lora', type=str, default='main', help='Lora repo or path e.g. `tloen/alpaca-lora-7b`')
|
parser.add_argument('-l', '--lora', type=str, default='main', help='Lora repo or path e.g. `tloen/alpaca-lora-7b` or `./loras/tloen_alpaca-lora-7b`')
|
||||||
parser.add_argument('-o', '--output', type=Path, default=None)
|
parser.add_argument('-o', '--output', type=Path, default=None)
|
||||||
"e.g. ./hf_ckpt. default will be lora name"
|
"if None default will be `./models/tloen_alpaca-lora-7b_delorafied` or similar"
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
main(args.model, args.lora, args.output)
|
main(args.model, args.lora, args.output)
|
||||||
|
|
||||||
Reference in New Issue
Block a user