diff --git a/mjc_notes.md b/mjc_notes.md index 30f5218..5565460 100644 --- a/mjc_notes.md +++ b/mjc_notes.md @@ -55,10 +55,10 @@ python scripts/download-model.py tloen/alpaca-lora-7b python scripts/download-model.py decapoda-research/llama-7b-hf # convert -python scripts/export_hf_checkpoint.py ./models/decapoda-research_llama-7b-hf -l loras/tloen_alpaca-lora-7b -python scripts/export_hf_checkpoint.py ./models/decapoda-research_llama-13b-hf -l loras/chansung_alpaca-lora-13b # crash! 50GB+ needed -python scripts/export_hf_checkpoint.py ./models/decapoda-research_llama-30b-hf -l loras/chansung_alpaca-lora-30b -python scripts/export_hf_checkpoint.py ./models/decapoda-research_llama-60b-hf -l loras/chansung_alpaca-lora-60b +python scripts/01_export_hf_checkpoint.py ./models/decapoda-research_llama-7b-hf -l loras/tloen_alpaca-lora-7b +python scripts/01_export_hf_checkpoint.py ./models/decapoda-research_llama-13b-hf -l loras/chansung_alpaca-lora-13b # crash! 50GB+ needed +python scripts/01_export_hf_checkpoint.py ./models/decapoda-research_llama-30b-hf -l loras/chansung_alpaca-lora-30b +python scripts/01_export_hf_checkpoint.py ./models/decapoda-research_llama-60b-hf -l loras/chansung_alpaca-lora-60b # test python scripts/test_01_delora.py models/tloen_alpaca-lora-7b-delorified @@ -66,6 +66,7 @@ python scripts/test_01_delora.py models/chansung_alpaca-lora-13b-delorified # now compare what was generated during conversion `test_prompts.txt`, to the loaded version # 4bit... +CUDA_VISIBLE_DEVICES=0 python llama.py ./models/tloen_alpaca-lora-7b-delorified c4 --wbits 4 --true-sequential --act-order --groupsize 128 --save_safetensors ./models/tloen_alpaca-lora-7b-delorified/llama7b-4bit-128g.safetensors # ggml conversion... ``` diff --git a/scripts/export_hf_checkpoint.py b/scripts/01_export_hf_checkpoint.py similarity index 77% rename from scripts/export_hf_checkpoint.py rename to scripts/01_export_hf_checkpoint.py index ac89e23..b959890 100644 --- a/scripts/export_hf_checkpoint.py +++ b/scripts/01_export_hf_checkpoint.py @@ -14,17 +14,6 @@ def main(BASE_MODEL, LORA_MODEL, output_path=None): if output_path is None: output_path = 'models/' + LORA_MODEL.split('/')[-1] + '-delorified' - # BASE_MODEL = os.environ.get("BASE_MODEL", None) - # assert ( - # BASE_MODEL - # ), "Please specify a value for BASE_MODEL environment variable, e.g. `export BASE_MODEL=huggyllama/llama-7b`" # noqa: E501 - - - # LORA_MODEL = os.environ.get("BASE_MODEL", None) - # assert ( - # LORA_MODEL - # ), "Please specify a value for LORA_MODEL environment variable, e.g. `export BASE_MODEL=tloen/alpaca-lora-7b`" # noqa: E501 - tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL) base_model = LlamaForCausalLM.from_pretrained( @@ -33,8 +22,6 @@ def main(BASE_MODEL, LORA_MODEL, output_path=None): torch_dtype=torch.float16, device_map={"": "cpu"}, ) - - # TODO or load 4 bit? first_weight = base_model.model.layers[0].self_attn.q_proj.weight first_weight_old = first_weight.clone() @@ -70,9 +57,8 @@ def main(BASE_MODEL, LORA_MODEL, output_path=None): LlamaForCausalLM.save_pretrained( base_model, output_path, state_dict=deloreanized_sd, max_shard_size="400MB" ) - print(f'output {output_path}') LlamaTokenizer.save_pretrained(tokenizer, output_path) - # FIXME also save tokenizer + print(f'output {output_path}') from alpaca_convert.test import test_conversation o = test_conversation(lora_model.float(), tokenizer) @@ -83,10 +69,10 @@ def main(BASE_MODEL, LORA_MODEL, output_path=None): if __name__=="__main__": parser = argparse.ArgumentParser() - parser.add_argument('model', type=str) - parser.add_argument('-l', '--lora', type=str, default='main', help='Lora repo or path e.g. `tloen/alpaca-lora-7b`') + parser.add_argument('model', type=str, help='Huggingface model e.g. `./models/decapoda-research_llama-7b-hf`') + parser.add_argument('-l', '--lora', type=str, default='main', help='Lora repo or path e.g. `tloen/alpaca-lora-7b` or `./loras/tloen_alpaca-lora-7b`') parser.add_argument('-o', '--output', type=Path, default=None) - "e.g. ./hf_ckpt. default will be lora name" + "if None default will be `./models/tloen_alpaca-lora-7b_delorafied` or similar" args = parser.parse_args() main(args.model, args.lora, args.output)