cuda

2026-06-27 16:17:59 +08:00 · 2026-05-02 06:04:58 +08:00
parent 4f2034dd46
commit 0bc46dc51e
10 changed files with 11 additions and 11 deletions
@@ -32,7 +32,7 @@ def main():
    tok = AutoTokenizer.from_pretrained(MODEL)
    if tok.pad_token is None:
        tok.pad_token = tok.eos_token
-    model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, device_map="auto")
+    model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, device_map="cuda")
    model.eval()

    calib = pl.read_csv("out/honesty/kl_calibration/summary.csv")
@@ -67,7 +67,7 @@ def main():
    tok = AutoTokenizer.from_pretrained(MODEL)
    if tok.pad_token is None:
        tok.pad_token = tok.eos_token
-    model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, device_map="auto")
+    model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, device_map="cuda")
    model.eval()

    # Load calibrated alphas
@@ -426,7 +426,7 @@ def generate_pairs(cfg: DataCfg) -> Path:
    if tok.pad_token is None:
        tok.pad_token = tok.eos_token
    model = AutoModelForCausalLM.from_pretrained(
-        cfg.model_id, torch_dtype=torch.bfloat16, device_map="auto"
+        cfg.model_id, torch_dtype=torch.bfloat16, device_map="cuda"
    )
    model.eval()

@@ -298,7 +298,7 @@ def evaluate(cfg: AIRiskCfg, w: dict[str, Tensor],
            tok.pad_token = tok.eos_token
    if model is None:
        model = AutoModelForCausalLM.from_pretrained(
-            cfg.model_id, dtype=torch.bfloat16, device_map="auto"
+            cfg.model_id, dtype=torch.bfloat16, device_map="cuda"
        )
        model.eval()

@@ -90,7 +90,7 @@ def evaluate(cfg: EvalCfg, w: dict[str, Tensor]) -> pl.DataFrame:
    if tok.pad_token is None:
        tok.pad_token = tok.eos_token
    model = AutoModelForCausalLM.from_pretrained(
-        cfg.model_id, torch_dtype=torch.bfloat16, device_map="auto"
+        cfg.model_id, torch_dtype=torch.bfloat16, device_map="cuda"
    )
    model.eval()

@@ -288,7 +288,7 @@ def run_eval(cfg: TinyMFVAiriskCfg) -> tuple[pl.DataFrame, pl.DataFrame, pl.Data
    if tok.pad_token is None:
        tok.pad_token = tok.eos_token
    tok.padding_side = "left"
-    model = AutoModelForCausalLM.from_pretrained(cfg.model, torch_dtype=torch.bfloat16, device_map="auto")
+    model = AutoModelForCausalLM.from_pretrained(cfg.model, torch_dtype=torch.bfloat16, device_map="cuda")
    model.eval()

    vignettes = _load_vignettes(cfg.limit)
@@ -360,7 +360,7 @@ def main(cfg: KLCalibrateCfg) -> None:
        tok.pad_token = tok.eos_token
    tok.padding_side = "left"
    model = AutoModelForCausalLM.from_pretrained(
-        cfg.model, dtype=torch.bfloat16, device_map="auto"
+        cfg.model, dtype=torch.bfloat16, device_map="cuda"
    )
    model.eval()

@@ -85,7 +85,7 @@ def phase_a1(cfg: Cfg, claims: list[tuple[str, str]], tok) -> None:
        adapter_path = cfg.out / cfg.behavior / cfg.adapter / sign
        logger.info(f"loading {sign} adapter from {adapter_path}")
        base = AutoModelForCausalLM.from_pretrained(
-            cfg.model, torch_dtype=torch.bfloat16, device_map="auto"
+            cfg.model, torch_dtype=torch.bfloat16, device_map="cuda"
        )
        model = PeftModel.from_pretrained(base, str(adapter_path))
        model.eval()
@@ -106,7 +106,7 @@ def phase_a2(cfg: Cfg, claims: list[tuple[str, str]], tok) -> pl.DataFrame:
    w = load_diff(w_path)

    model = AutoModelForCausalLM.from_pretrained(
-        cfg.model, torch_dtype=torch.bfloat16, device_map="auto"
+        cfg.model, torch_dtype=torch.bfloat16, device_map="cuda"
    )
    model.eval()
    choice_ids = get_choice_ids(tok)
@@ -65,7 +65,7 @@ def main(cfg: PersonaDebugCfg) -> None:
    if tok.pad_token is None:
        tok.pad_token = tok.eos_token
    model = AutoModelForCausalLM.from_pretrained(
-        cfg.model, dtype=torch.bfloat16, device_map="auto"
+        cfg.model, dtype=torch.bfloat16, device_map="cuda"
    )
    model.eval()

@@ -156,7 +156,7 @@ def train_adapter(cfg: TrainCfg, ds: Dataset) -> Path:
        tok.pad_token = tok.eos_token

    model = AutoModelForCausalLM.from_pretrained(
-        cfg.model_id, torch_dtype=torch.bfloat16, device_map="auto"
+        cfg.model_id, torch_dtype=torch.bfloat16, device_map="cuda"
    )
    model.config.use_cache = False