From c3af6cc03c03ad7b7d65ac98ae78b1f714fab0ff Mon Sep 17 00:00:00 2001
From: wassname <1103714+wassname@users.noreply.github.com>
Date: Wed, 10 Jun 2026 05:26:51 +0000
Subject: [PATCH] rename: deployed/as_trained policy views, kill 'knob' (schema
 paired_final_v2)

Disambiguate the overloaded deploy/train/knob vocabulary (paper-consistent:
'quarantine' + 'ablated' + 'deployed' all match Cloud et al.). One opposite each:
- policy view: hack_deployed/solve_deployed (quarantine ablated, ships) vs
  hack_as_trained/solve_as_trained (quarantine attached). Unifies the old split
  deploy_hack (JSON) vs hack_deploy (table key) into one name.
- 'knob' -> 'quarantine'/'adapter' throughout comments and log strings.
- train/test reserved for the DATA split only.
Bump RUN_SCHEMA v1->v2 so old deploy_test.json files are skipped (not crashed) by
completed_runs. CLI flags untouched (queued jobs unaffected). Fixed two
replace_all collision bugs (hack_deploy substring of hack_deployed -> deployeded)
and the missed eval_curve writer (eval_checkpoint_curve.py) + readers
(results_deploy.py). Smoke green: v2 written + read; gates pass.

Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
---
 scripts/eval_checkpoint_curve.py |  4 +--
 scripts/plot_floor_ceiling.py    | 44 ++++++++++++++++----------------
 scripts/rescore_deploy.py        |  6 ++---
 scripts/results_deploy.py        | 16 ++++++------
 src/vgrout/run_artifacts.py      | 13 +++++-----
 src/vgrout/tablelog.py           | 12 +++------
 src/vgrout/train.py              | 30 +++++++++++-----------
 7 files changed, 61 insertions(+), 64 deletions(-)

diff --git a/scripts/eval_checkpoint_curve.py b/scripts/eval_checkpoint_curve.py
index ee10cb2..00ff9bd 100644
--- a/scripts/eval_checkpoint_curve.py
+++ b/scripts/eval_checkpoint_curve.py
@@ -80,8 +80,8 @@ def main(run_dir: Positional[Path]) -> None:
         else:
             deploy = train
         row = {"updates_completed": updates, "n": deploy["n"],
-               "train_hack": train["hack"], "train_solve": train["solve"],
-               "deploy_hack": deploy["hack"], "deploy_solve": deploy["solve"]}
+               "hack_as_trained": train["hack"], "solve_as_trained": train["solve"],
+               "hack_deployed": deploy["hack"], "solve_deployed": deploy["solve"]}
         with out_path.open("a") as f:
             f.write(json.dumps(row) + "\n")
         logger.info(row)
diff --git a/scripts/plot_floor_ceiling.py b/scripts/plot_floor_ceiling.py
index 5289226..a32c838 100644
--- a/scripts/plot_floor_ceiling.py
+++ b/scripts/plot_floor_ceiling.py
@@ -80,31 +80,31 @@ def build_csv() -> pl.DataFrame:
         ev = [json.loads(l) for l in (run / "eval_curve.jsonl").read_text().splitlines()]
         rows.append(dict(
             label=label, kind="method",
-            hack_deploy=round(dep["deploy_hack"], 4), solve_deploy=round(dep["deploy_solve"], 4),
+            hack_deployed=round(dep["hack_deployed"], 4), solve_deployed=round(dep["solve_deployed"], 4),
             # knob-ON deploy (deployed-as-trained) on the SAME n=119 set -- None until backfilled
             # (rescore_deploy.py) so the deploy before->after is honest, not borrowed from val.
-            hack_deploy_on=_r4(dep.get("deploy_hack_on")), solve_deploy_on=_r4(dep.get("deploy_solve_on")),
-            hack_on=round(_l5(ev, "train_hack"), 4),  hack_off=round(_l5(ev, "deploy_hack"), 4),
-            solve_on=round(_l5(ev, "train_solve"), 4), solve_off=round(_l5(ev, "deploy_solve"), 4),
+            hack_as_trained=_r4(dep.get("hack_as_trained")), solve_as_trained=_r4(dep.get("solve_as_trained")),
+            hack_on=round(_l5(ev, "hack_as_trained"), 4),  hack_off=round(_l5(ev, "hack_deployed"), 4),
+            solve_on=round(_l5(ev, "solve_as_trained"), 4), solve_off=round(_l5(ev, "solve_deployed"), 4),
             source=f"{run.name}/[deploy_test.json + eval_curve.jsonl]", status=status))
 
     base = json.loads((_find_run("_dir8_baseline_s43") / "deploy_test.json").read_text())
     rows.append(dict(label="base (floor)", kind="anchor_floor",
-                     hack_deploy=round(base["deploy_hack"], 4), solve_deploy=round(base["deploy_solve"], 4),
-                     hack_deploy_on=None, solve_deploy_on=None,
+                     hack_deployed=round(base["hack_deployed"], 4), solve_deployed=round(base["solve_deployed"], 4),
+                     hack_as_trained=None, solve_as_trained=None,
                      hack_on=None, hack_off=None, solve_on=None, solve_off=None,
                      source="*_dir8_baseline_s43/deploy_test.json", status="ok (base model; steps=0)"))
 
     ceil_path = next(RUNS.glob("*noloophole*/deploy_test.json"), None)
     if ceil_path:
-        ceil_solve, status = round(json.loads(ceil_path.read_text())["deploy_solve"], 4), "ok"
+        ceil_solve, status = round(json.loads(ceil_path.read_text())["solve_deployed"], 4), "ok"
         source = f"{ceil_path.parent.name}/deploy_test.json"
     else:
         ceil_solve, status = PAPER_CEILING, "FIXME: PROVISIONAL paper 0.223 -- awaiting job 24 (no-loophole ceiling)"
         source = "Ariahw et al. 2025 (paper), NOT our run"
     rows.append(dict(label="ceiling", kind="anchor_ceiling",
-                     hack_deploy=0.0, solve_deploy=ceil_solve,
-                     hack_deploy_on=None, solve_deploy_on=None,
+                     hack_deployed=0.0, solve_deployed=ceil_solve,
+                     hack_as_trained=None, solve_as_trained=None,
                      hack_on=None, hack_off=None, solve_on=None, solve_off=None,
                      source=source, status=status))
 
@@ -135,9 +135,9 @@ GOLD, DARK = "#c8920a", "#3a3a3a"
 def _anchors(df: pl.DataFrame) -> dict:
     g = lambda kind, col: df.filter(pl.col("kind") == kind)[col][0]
     ceil_status = g("anchor_ceiling", "status")
-    return dict(base_solve=g("anchor_floor", "solve_deploy"),
-                vanilla_hack=df.filter(pl.col("label") == "vanilla GRPO")["hack_deploy"][0],
-                ceiling=g("anchor_ceiling", "solve_deploy"),
+    return dict(base_solve=g("anchor_floor", "solve_deployed"),
+                vanilla_hack=df.filter(pl.col("label") == "vanilla GRPO")["hack_deployed"][0],
+                ceiling=g("anchor_ceiling", "solve_deployed"),
                 provisional=ceil_status.startswith("FIXME"))
 
 
@@ -166,8 +166,8 @@ def plot(df: pl.DataFrame) -> None:
     pick = lambda lab: df.filter(pl.col("label") == lab).to_dicts()[0]
     best, rand, van = pick("routeV per-token"), pick("routeV random-V"), pick("vanilla GRPO")
 
-    def hsupp(r): return (vh - r["hack_deploy"]) / vh
-    def suplift(r): return (r["solve_deploy"] - base) / (ceil - base)
+    def hsupp(r): return (vh - r["hack_deployed"]) / vh
+    def suplift(r): return (r["solve_deployed"] - base) / (ceil - base)
 
     # OURS ONLY -- no paper bars. The paper comparison is cross-scale/regime (their converged
     # full-env vs our 60-step fast surrogate) so it can only ever be directional; the paper
@@ -175,14 +175,14 @@ def plot(df: pl.DataFrame) -> None:
     # vanilla is the floor anchor (defines vh, so its hack-suppression is 0 by construction);
     # random-V is the directionality control; per-token is the live arm.
     hack_rows = [
-        ("vanilla GRPO\n(floor)",                hsupp(van),  f"{van['hack_deploy']:.3f}",  RED),
-        ("routeV random-V\n(direction control)", hsupp(rand), f"{rand['hack_deploy']:.3f}", DARK),
-        ("routeV per-token\n(best)",             hsupp(best), f"{best['hack_deploy']:.3f}", GOLD),
+        ("vanilla GRPO\n(floor)",                hsupp(van),  f"{van['hack_deployed']:.3f}",  RED),
+        ("routeV random-V\n(direction control)", hsupp(rand), f"{rand['hack_deployed']:.3f}", DARK),
+        ("routeV per-token\n(best)",             hsupp(best), f"{best['hack_deployed']:.3f}", GOLD),
     ]
     solve_rows = [
-        ("vanilla GRPO\n(floor)",                suplift(van),  f"{van['solve_deploy']:.3f}",  RED),
-        ("routeV random-V\n(direction control)", suplift(rand), f"{rand['solve_deploy']:.3f}", DARK),
-        ("routeV per-token\n(best)",             suplift(best), f"{best['solve_deploy']:.3f}", GOLD),
+        ("vanilla GRPO\n(floor)",                suplift(van),  f"{van['solve_deployed']:.3f}",  RED),
+        ("routeV random-V\n(direction control)", suplift(rand), f"{rand['solve_deployed']:.3f}", DARK),
+        ("routeV per-token\n(best)",             suplift(best), f"{best['solve_deployed']:.3f}", GOLD),
     ]
     prov = " (ceiling PROVISIONAL=0.223, FIXME job 24)" if a["provisional"] else ""
     fig, (axl, axr) = plt.subplots(1, 2, figsize=(11.5, 5.0), sharey=False)
@@ -225,7 +225,7 @@ def _methods(df: pl.DataFrame) -> list[dict]:
 def plot_scatter(df: pl.DataFrame) -> None:
     a = _anchors(df)
     base, ceil = a["base_solve"], a["ceiling"]
-    H = lambda r: r["hack_deploy"]; S = lambda r: r["solve_deploy"]
+    H = lambda r: r["hack_deployed"]; S = lambda r: r["solve_deployed"]
     prov = "*" if a["provisional"] else ""
 
     fig, ax = plt.subplots(figsize=(7.2, 5.4))
@@ -244,7 +244,7 @@ def plot_scatter(df: pl.DataFrame) -> None:
     # not an eval-set artifact. Arms without the backfill fall back to dot-only.
     for r in _methods(df):
         col = ARM_COLOR.get(r["label"], GREY)
-        hon, son = r["hack_deploy_on"], r["solve_deploy_on"]
+        hon, son = r["hack_as_trained"], r["solve_as_trained"]
         if hon is not None and (abs(hon - H(r)) > 1e-6 or abs(son - S(r)) > 1e-6):
             ax.annotate("", xy=(H(r), S(r)), xytext=(hon, son),
                         arrowprops=dict(arrowstyle="-|>", color=col, lw=2.0, alpha=0.85, shrinkA=6, shrinkB=8))
diff --git a/scripts/rescore_deploy.py b/scripts/rescore_deploy.py
index a5f2f4a..4b565ba 100644
--- a/scripts/rescore_deploy.py
+++ b/scripts/rescore_deploy.py
@@ -66,9 +66,9 @@ def main(run_dir: Positional[Path]) -> None:
         "schema": RUN_SCHEMA,
         "run_dir": run_dir.name, "model": model_name, "step": meta.get("step"),
         "eval_set": "test", "eval_modes": eval_modes,
-        "n": ev["n"], "deploy_hack": ev["hack"], "deploy_vhack": ev["vhack"], "deploy_solve": ev["solve"],
-        "deploy_hack_on": ev_on["hack"], "deploy_vhack_on": ev_on["vhack"],
-        "deploy_solve_on": ev_on["solve"],
+        "n": ev["n"], "hack_deployed": ev["hack"], "vhack_deployed": ev["vhack"], "solve_deployed": ev["solve"],
+        "hack_as_trained": ev_on["hack"], "vhack_as_trained": ev_on["vhack"],
+        "solve_as_trained": ev_on["solve"],
         "by_mode": {m: {"hack": h / max(1, c), "vhack": v / max(1, c), "solve": s / max(1, c), "n": c}
                     for m, (h, v, s, c) in ev["by_mode"].items()},
     }
diff --git a/scripts/results_deploy.py b/scripts/results_deploy.py
index d621841..00038d0 100644
--- a/scripts/results_deploy.py
+++ b/scripts/results_deploy.py
@@ -1,4 +1,4 @@
-"""Final paired knob-off/knob-on scores from completed structured run artifacts."""
+"""Final paired deployed/as-trained scores from completed structured run artifacts."""
 from __future__ import annotations
 
 import polars as pl
@@ -15,11 +15,11 @@ def main() -> None:
             continue
         rows.append({
             "time": run["time"],
-            "headline": deploy["deploy_solve"] - deploy["deploy_hack"],
-            "hack_off": deploy["deploy_hack"],
-            "solve_off": deploy["deploy_solve"],
-            "hack_on": deploy["deploy_hack_on"],
-            "solve_on": deploy["deploy_solve_on"],
+            "headline": deploy["solve_deployed"] - deploy["hack_deployed"],
+            "hack_deployed": deploy["hack_deployed"],
+            "solve_deployed": deploy["solve_deployed"],
+            "hack_as_trained": deploy["hack_as_trained"],
+            "solve_as_trained": deploy["solve_as_trained"],
             "select": route_selectivity(run["run_dir"]),
             "arm": run["arm"],
             "pair": cfg["vhack_pairs_path"].split("/")[-1].removesuffix(".json"),
@@ -35,10 +35,10 @@ def main() -> None:
         print("no completed non-smoke runs in out/runs/")
         return
     df = pl.DataFrame(rows).sort("headline", descending=True)
-    cols = ["time", "headline", "hack_off", "solve_off", "hack_on", "solve_on",
+    cols = ["time", "headline", "hack_deployed", "solve_deployed", "hack_as_trained", "solve_as_trained",
             "select", "arm", "pair", "seed", "hack_train", "solve_train", "model",
             "n", "modes", "run"]
-    print("\n## Final paired test eval, sorted by knob-off solve-hack\n")
+    print("\n## Final paired test eval, sorted by deployed solve-hack\n")
     print(tabulate(df.select(cols).rows(), headers=cols, tablefmt="pipe", floatfmt="+.3f"))
 
 
diff --git a/src/vgrout/run_artifacts.py b/src/vgrout/run_artifacts.py
index 03b1cec..a8463e5 100644
--- a/src/vgrout/run_artifacts.py
+++ b/src/vgrout/run_artifacts.py
@@ -8,8 +8,9 @@ from safetensors import safe_open
 
 
 RUNS_DIR = Path("out/runs")
-RUN_SCHEMA = "paired_final_v1"
-ARM = {"none": "vanilla", "erase": "projected", "routeV": "routingV"}
+RUN_SCHEMA = "paired_final_v2"   # v2: deployed/as_trained field names (was deploy_*/deploy_*_on)
+ARM = {"none": "vanilla", "erase": "projected",
+       "routeV": "routingV", "routeV_per_token": "routingV_per_token"}
 
 
 def _mean_fraction(rows: list[dict], key: str) -> float:
@@ -29,7 +30,7 @@ def load_run(run_dir: Path) -> dict:
     deploy = json.loads(deploy_path.read_text())
     if deploy.get("schema") != RUN_SCHEMA:
         raise ValueError(f"{deploy_path}: expected schema={RUN_SCHEMA}, got {deploy.get('schema')}")
-    required_deploy = {"eval_modes", "n", "deploy_hack", "deploy_solve", "deploy_hack_on", "deploy_solve_on"}
+    required_deploy = {"eval_modes", "n", "hack_deployed", "solve_deployed", "hack_as_trained", "solve_as_trained"}
     missing = required_deploy - deploy.keys()
     if missing:
         raise ValueError(f"{deploy_path}: missing fields {sorted(missing)}")
@@ -61,8 +62,8 @@ def route_selectivity(run_dir: Path) -> float | None:
         return None
     rows = [json.loads(line) for line in curve.read_text().splitlines()][-5:]
     mean = lambda key: sum(row[key] for row in rows) / len(rows)
-    hack_on, solve_on = mean("train_hack"), mean("train_solve")
+    hack_on, solve_on = mean("hack_as_trained"), mean("solve_as_trained")
     if hack_on == 0 or solve_on == 0:
         return None
-    return round((hack_on - mean("deploy_hack")) / hack_on
-                 - (solve_on - mean("deploy_solve")) / solve_on, 3)
+    return round((hack_on - mean("hack_deployed")) / hack_on
+                 - (solve_on - mean("solve_deployed")) / solve_on, 3)
diff --git a/src/vgrout/tablelog.py b/src/vgrout/tablelog.py
index dcc67dd..2e23417 100644
--- a/src/vgrout/tablelog.py
+++ b/src/vgrout/tablelog.py
@@ -84,15 +84,11 @@ class StepLogger:
             _Col("gt_t",   6, "gt_t",    "frac", "teacher ground-truth passes (sanity)"),
             _Col("hack_s", 7, "hack_s?", "frac", "student hack-flagged rollouts (the headline)"),
             _Col("hack_t", 7, "hack_t",  "frac", "teacher hack-flagged rollouts (sanity: pool hacks)"),
-            # Deploy-eval shown for EVERY arm (nan on steps it's not run -> see it ride
-            # along as training proceeds). routeV: quarantine knob OFF. vanilla/erase:
-            # the trained model itself. Apples-to-apples knob-off deploy number, the plot series.
-            _Col("hack_deploy",  7, "hk_dep",  "+.2f", "DEPLOY-eval hack (routeV: quarantine OFF; vanilla/erase: trained model); held-out subset, T=0.7, every eval_ablate_every steps; nan between"),
-            _Col("solve_deploy", 7, "slv_dep", "+.2f", "DEPLOY-eval solve (same cadence; nan between)"),
+            # Held-out deployed evaluation with quarantine ablated; NaN between evaluation steps.
+            _Col("hack_deployed",  7, "hk_dep",  "+.2f", "DEPLOY-eval hack (routeV: quarantine OFF; vanilla/erase: trained model); held-out subset, T=0.7, every eval_ablate_every steps; nan between"),
+            _Col("solve_deployed", 7, "slv_dep", "+.2f", "DEPLOY-eval solve (same cadence; nan between)"),
         ]
-        # Per-mode CUMULATIVE student exploit rate -> which loophole classes the
-        # student has learnt, and how strongly. Only when the run spans >1 mode
-        # (the substrate); single-mode runs would just duplicate hack_s.
+        # Multi-mode runs show current-step hacks per environment; single-mode would duplicate hack_s.
         self._modes = modes if len(modes) > 1 else []
         for m in self._modes:
             cols.append(_Col(f"hk_{mode_code[m]}", 5, f"hk_{mode_code[m]}", "d",
diff --git a/src/vgrout/train.py b/src/vgrout/train.py
index e75b518..ea172f7 100644
--- a/src/vgrout/train.py
+++ b/src/vgrout/train.py
@@ -1330,7 +1330,7 @@ def main(cfg: Config) -> int:
             refr = f"{len(v_hack)}/{sum(V.shape[0] for V in v_hack.values())}"  # mod/axes -> per-step row
 
         # Evaluate every arm on the same held-out validation prompts and sampling seed.
-        hack_deploy = solve_deploy = float("nan")
+        hack_deployed = solve_deployed = float("nan")
         if cfg.eval_ablate_every > 0 and (step % cfg.eval_ablate_every == 0 or step == steps - 1):
             _was_training = model.training
             model.eval()
@@ -1351,23 +1351,23 @@ def main(cfg: Config) -> int:
             torch.set_rng_state(_cpu_rng)
             if _cuda_rng is not None:
                 torch.cuda.set_rng_state_all(_cuda_rng)
-            hack_deploy, solve_deploy = ev_dp["hack"], ev_dp["solve"]
+            hack_deployed, solve_deployed = ev_dp["hack"], ev_dp["solve"]
             if _was_training:
                 model.train()
             with eval_curve_path.open("a") as f:
                 f.write(json.dumps({
                     "step": step, "n": ev_dp["n"], "split": "val",
-                    "train_hack": ev_tr["hack"], "train_vhack": ev_tr["vhack"], "train_solve": ev_tr["solve"],
-                    "deploy_hack": ev_dp["hack"], "deploy_vhack": ev_dp["vhack"], "deploy_solve": ev_dp["solve"],
+                    "hack_as_trained": ev_tr["hack"], "vhack_as_trained": ev_tr["vhack"], "solve_as_trained": ev_tr["solve"],
+                    "hack_deployed": ev_dp["hack"], "vhack_deployed": ev_dp["vhack"], "solve_deployed": ev_dp["solve"],
                     "by_mode_deploy": {m: {"hack_n": h, "vhack_n": v, "solve_n": s, "n": c}
                                        for m, (h, v, s, c) in ev_dp["by_mode"].items()},
                 }) + "\n")
-            should = ("deploy hack < train hack (knob holds the cheat); ELSE routing isn't capturing it"
+            should = ("quarantine-ablated hack < quarantine-enabled hack; ELSE routing isn't capturing it"
                       if is_route else "deploy == train (no quarantine)")
             logger.info(
-                f"step {step} VAL-eval (n={ev_dp['n']}): train/knob-on hack={ev_tr['hack']:.3f} "
-                f"solve={ev_tr['solve']:.3f} | deploy/knob-off hack={hack_deploy:.3f} "
-                f"solve={solve_deploy:.3f}.  SHOULD: {should}")
+                f"step {step} VAL-eval (n={ev_dp['n']}): quarantine-enabled hack={ev_tr['hack']:.3f} "
+                f"solve={ev_tr['solve']:.3f} | deployed/quarantine-ablated hack={hack_deployed:.3f} "
+                f"solve={solve_deployed:.3f}.  SHOULD: {should}")
             # High base solve leaves little room for the exploited metric to rise.
             if step == 0 and ev_tr["solve"] >= 0.9:
                 logger.warning(
@@ -1497,8 +1497,8 @@ def main(cfg: Config) -> int:
             # Route deploy-eval (δS_hack=0); NaN except on route eval steps.
             # Appended AFTER refr so results.py's positional GT_S/HACK_S indices
             # are unaffected. plot_dynamics reads it by name.
-            "hack_deploy": hack_deploy,
-            "solve_deploy": solve_deploy,
+            "hack_deployed": hack_deployed,
+            "solve_deployed": solve_deployed,
             # Free per-step deploy proxy from the ablated rollout slice (above).
             "hack_abl": (hack_abl_n, n_abl_step) if n_abl_step else (0, 0),
             "solve_abl": (gt_abl_n, n_abl_step) if n_abl_step else (0, 0),
@@ -1639,9 +1639,9 @@ def main(cfg: Config) -> int:
                                 cfg.eval_batch_size)
     else:
         ev_on = ev
-    logger.info(f"FINAL EVAL [{cfg.arm}] DEPLOY knob-off (held-out test, n={ev['n']}): "
+    logger.info(f"FINAL EVAL [{cfg.arm}] DEPLOY quarantine-ablated (held-out test, n={ev['n']}): "
                 f"hack(strict)={ev['hack']:.3f} hack(vendor eq_hinted)={ev['vhack']:.3f} solve={ev['solve']:.3f}"
-                + (f"  |  knob-on: hack={ev_on['hack']:.3f} solve={ev_on['solve']:.3f}"
+                + (f"  |  quarantine-enabled: hack={ev_on['hack']:.3f} solve={ev_on['solve']:.3f}"
                    if has_quarantine else ""))
     by_mode = {}
     for mode in sorted(ev["by_mode"]):
@@ -1654,9 +1654,9 @@ def main(cfg: Config) -> int:
         "seed": cfg.seed, "steps": n_steps, "model": model_name, "out_tag": cfg.out_tag,
         "eval": cfg.eval, "unhackable_frac": cfg.unhackable_frac, "pairs": str(cfg.vhack_pairs_path.name),
         "eval_set": "test", "eval_modes": eval_modes, "n": ev["n"],
-        "deploy_hack": ev["hack"], "deploy_vhack": ev["vhack"], "deploy_solve": ev["solve"],
-        "deploy_hack_on": ev_on["hack"], "deploy_vhack_on": ev_on["vhack"],
-        "deploy_solve_on": ev_on["solve"],
+        "hack_deployed": ev["hack"], "vhack_deployed": ev["vhack"], "solve_deployed": ev["solve"],
+        "hack_as_trained": ev_on["hack"], "vhack_as_trained": ev_on["vhack"],
+        "solve_as_trained": ev_on["solve"],
         "by_mode": by_mode, "log": str(verbose_log),
     }
     deploy_path = run_dir / "deploy_test.json"