diff --git a/generate.py b/generate.py new file mode 100644 index 0000000..028ba43 --- /dev/null +++ b/generate.py @@ -0,0 +1,28 @@ +import torch +from transformers import pipeline +import json +import warnings + +model_id = "princeton-nlp/Llama-3-Instruct-8B-SimPO" + +with open('chat_templates.json', 'r') as f: + chat_templates = json.load(f) + +if "llama-3" in model_id.lower(): + template = chat_templates["llama3"] +elif "mistral-7b-base" in model_id.lower(): + template = chat_templates["mistral-base"] +elif "mistral-7b-instruct" in model_id.lower(): + template = chat_templates["mistral-instruct"] +else: + warnings.warn("No template set for the given model_id.") + +generator = pipeline( + "text-generation", + model=model_id, + model_kwargs={"torch_dtype": torch.bfloat16}, + device="cuda", +) +generator.tokenizer.chat_template = template +outputs = generator([{"role": "user", "content": "What's the difference between llamas and alpacas?"}], do_sample=False, max_new_tokens=200) +print(outputs[0]['generated_text'])