import torch from transformers import pipeline model_id = "princeton-nlp/gemma-2-9b-it-SimPO" generator = pipeline( "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device="cuda", ) outputs = generator([{"role": "user", "content": "What's the difference between llamas and alpacas?"}], do_sample=False, max_new_tokens=200) print(outputs[0]['generated_text'])