diff --git a/nbs/daily_dilemmas/check_alpha_scaling.py b/nbs/daily_dilemmas/check_alpha_scaling.py index 8d7fbeb..ea9ad0d 100644 --- a/nbs/daily_dilemmas/check_alpha_scaling.py +++ b/nbs/daily_dilemmas/check_alpha_scaling.py @@ -32,7 +32,7 @@ def main(): tok = AutoTokenizer.from_pretrained(MODEL) if tok.pad_token is None: tok.pad_token = tok.eos_token - model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, device_map="auto") + model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, device_map="cuda") model.eval() calib = pl.read_csv("out/honesty/kl_calibration/summary.csv") diff --git a/nbs/daily_dilemmas/demo_calibrated_traces.py b/nbs/daily_dilemmas/demo_calibrated_traces.py index ecde2a2..600bbed 100644 --- a/nbs/daily_dilemmas/demo_calibrated_traces.py +++ b/nbs/daily_dilemmas/demo_calibrated_traces.py @@ -67,7 +67,7 @@ def main(): tok = AutoTokenizer.from_pretrained(MODEL) if tok.pad_token is None: tok.pad_token = tok.eos_token - model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, device_map="auto") + model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, device_map="cuda") model.eval() # Load calibrated alphas diff --git a/src/ws/data.py b/src/ws/data.py index ba20054..3788333 100644 --- a/src/ws/data.py +++ b/src/ws/data.py @@ -426,7 +426,7 @@ def generate_pairs(cfg: DataCfg) -> Path: if tok.pad_token is None: tok.pad_token = tok.eos_token model = AutoModelForCausalLM.from_pretrained( - cfg.model_id, torch_dtype=torch.bfloat16, device_map="auto" + cfg.model_id, torch_dtype=torch.bfloat16, device_map="cuda" ) model.eval() diff --git a/src/ws/eval/airisk.py b/src/ws/eval/airisk.py index 225f8f2..20cd63e 100644 --- a/src/ws/eval/airisk.py +++ b/src/ws/eval/airisk.py @@ -298,7 +298,7 @@ def evaluate(cfg: AIRiskCfg, w: dict[str, Tensor], tok.pad_token = tok.eos_token if model is None: model = AutoModelForCausalLM.from_pretrained( - cfg.model_id, dtype=torch.bfloat16, device_map="auto" + cfg.model_id, dtype=torch.bfloat16, device_map="cuda" ) model.eval() diff --git a/src/ws/eval/sycophancy.py b/src/ws/eval/sycophancy.py index a52133e..9b4165f 100644 --- a/src/ws/eval/sycophancy.py +++ b/src/ws/eval/sycophancy.py @@ -90,7 +90,7 @@ def evaluate(cfg: EvalCfg, w: dict[str, Tensor]) -> pl.DataFrame: if tok.pad_token is None: tok.pad_token = tok.eos_token model = AutoModelForCausalLM.from_pretrained( - cfg.model_id, torch_dtype=torch.bfloat16, device_map="auto" + cfg.model_id, torch_dtype=torch.bfloat16, device_map="cuda" ) model.eval() diff --git a/src/ws/eval/tinymfv_airisk.py b/src/ws/eval/tinymfv_airisk.py index 3b35d8a..7415fd3 100644 --- a/src/ws/eval/tinymfv_airisk.py +++ b/src/ws/eval/tinymfv_airisk.py @@ -288,7 +288,7 @@ def run_eval(cfg: TinyMFVAiriskCfg) -> tuple[pl.DataFrame, pl.DataFrame, pl.Data if tok.pad_token is None: tok.pad_token = tok.eos_token tok.padding_side = "left" - model = AutoModelForCausalLM.from_pretrained(cfg.model, torch_dtype=torch.bfloat16, device_map="auto") + model = AutoModelForCausalLM.from_pretrained(cfg.model, torch_dtype=torch.bfloat16, device_map="cuda") model.eval() vignettes = _load_vignettes(cfg.limit) diff --git a/src/ws/kl_calibrate.py b/src/ws/kl_calibrate.py index b53a4a1..984c622 100644 --- a/src/ws/kl_calibrate.py +++ b/src/ws/kl_calibrate.py @@ -360,7 +360,7 @@ def main(cfg: KLCalibrateCfg) -> None: tok.pad_token = tok.eos_token tok.padding_side = "left" model = AutoModelForCausalLM.from_pretrained( - cfg.model, dtype=torch.bfloat16, device_map="auto" + cfg.model, dtype=torch.bfloat16, device_map="cuda" ) model.eval() diff --git a/src/ws/run_demo.py b/src/ws/run_demo.py index 2c440f2..c97a8e0 100644 --- a/src/ws/run_demo.py +++ b/src/ws/run_demo.py @@ -85,7 +85,7 @@ def phase_a1(cfg: Cfg, claims: list[tuple[str, str]], tok) -> None: adapter_path = cfg.out / cfg.behavior / cfg.adapter / sign logger.info(f"loading {sign} adapter from {adapter_path}") base = AutoModelForCausalLM.from_pretrained( - cfg.model, torch_dtype=torch.bfloat16, device_map="auto" + cfg.model, torch_dtype=torch.bfloat16, device_map="cuda" ) model = PeftModel.from_pretrained(base, str(adapter_path)) model.eval() @@ -106,7 +106,7 @@ def phase_a2(cfg: Cfg, claims: list[tuple[str, str]], tok) -> pl.DataFrame: w = load_diff(w_path) model = AutoModelForCausalLM.from_pretrained( - cfg.model, torch_dtype=torch.bfloat16, device_map="auto" + cfg.model, torch_dtype=torch.bfloat16, device_map="cuda" ) model.eval() choice_ids = get_choice_ids(tok) diff --git a/src/ws/scripts/debug_personas.py b/src/ws/scripts/debug_personas.py index 32926fb..b7f2dec 100644 --- a/src/ws/scripts/debug_personas.py +++ b/src/ws/scripts/debug_personas.py @@ -65,7 +65,7 @@ def main(cfg: PersonaDebugCfg) -> None: if tok.pad_token is None: tok.pad_token = tok.eos_token model = AutoModelForCausalLM.from_pretrained( - cfg.model, dtype=torch.bfloat16, device_map="auto" + cfg.model, dtype=torch.bfloat16, device_map="cuda" ) model.eval() diff --git a/src/ws/train.py b/src/ws/train.py index cdb3a36..f6d6a70 100644 --- a/src/ws/train.py +++ b/src/ws/train.py @@ -156,7 +156,7 @@ def train_adapter(cfg: TrainCfg, ds: Dataset) -> Path: tok.pad_token = tok.eos_token model = AutoModelForCausalLM.from_pretrained( - cfg.model_id, torch_dtype=torch.bfloat16, device_map="auto" + cfg.model_id, torch_dtype=torch.bfloat16, device_map="cuda" ) model.config.use_cache = False