From 70aa6aa96b02c6d34cdb70674b0ab92834317012 Mon Sep 17 00:00:00 2001 From: wassname <1103714+wassname@users.noreply.github.com> Date: Sat, 6 Jun 2026 20:30:19 +0800 Subject: [PATCH] modal: parallel GRPO sweep port (image, volume, fan-out launcher) Fire the paper sweep as independent H100/A100-80 containers instead of serial pueue runs. One Volume caches model + svd + out/; train.py runs unmodified (torch 2.7 + Dao flash-attn wheel, code mounted at runtime). Verified: vanilla 60-step reproduces the local baseline. Skill at ~/.claude/skills/modal documents the patterns. Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com> --- modal/README.md | 95 ++++++++++++++++++ modal/app.py | 213 +++++++++++++++++++++++++++++++++++++++++ modal/fetch.py | 42 ++++++++ modal/launch.py | 82 ++++++++++++++++ modal/upload_inputs.py | 54 +++++++++++ pyproject.toml | 1 + uv.lock | 111 ++++++++++++++++++++- 7 files changed, 597 insertions(+), 1 deletion(-) create mode 100644 modal/README.md create mode 100644 modal/app.py create mode 100644 modal/fetch.py create mode 100644 modal/launch.py create mode 100644 modal/upload_inputs.py diff --git a/modal/README.md b/modal/README.md new file mode 100644 index 0000000..8ce0687 --- /dev/null +++ b/modal/README.md @@ -0,0 +1,95 @@ +# Modal port — parallel GRPO runs + +Fan the paper's GRPO sweep (jobs 124-135 of `docs/spec/20260606_job_manifest.md`) +out as independent H100 containers instead of running them serially through +pueue on the one 96GB box. ~12 runs finish in one run's wall-clock instead of ~2 +days. + +General Modal patterns/gotchas (reusable across projects) live in the global +`modal` skill (`~/.claude/skills/modal/SKILL.md`); this dir is its worked example. + +## Files + +- `app.py` — image, Volume, and the `train` / `warm` / `smoke` GPU functions. +- `upload_inputs.py` — push the gitignored run inputs (pairsets, vhack, pools) to + the Volume. Run from a box that has them. +- `launch.py` — fan out the 12-job inventory with `.spawn()`. + +## Design decisions (and why) + +- **GPU = `["H100", "A100-80GB"]` (80GB, fallback list).** The full preset peaked + ~73GB bf16 on the local card, so an 80GB card is required. H100 is ~1.5-2x + faster than A100-80 for ~1.6x the price (≈ same $/run, half the wall-clock). + On a 12-way fan-out H100 capacity can queue, so we fall back to A100-80GB — it + runs the same Dao flash-attn wheel (bundles sm_80) and deploy numbers are + hardware-independent. Override per-run with `VGROUT_GPU=H200` if a long run OOMs. +- **torch 2.7, not the repo's pinned 2.8.** Dao-AILab ships no cp313+torch2.8 + flash-attn wheel; the 2.8.3 line tops out at torch2.7 for cp313. The official + Dao wheel bundles sm_80/86/90 so it runs on A100/H100 — unlike the repo's + Blackwell sm_120-only pin. This keeps train.py's hardcoded `flash_attention_2` + path working with **zero patch to the research code**. +- **No vllm, no causal-conv1d.** Generation is HF `.generate` (nothing in + `src/vgrout` imports vllm); causal-conv1d is only for Qwen3.5's gated-delta-net, + and the model here is standard-attention Qwen3-4B. +- **One Volume `vgrout-cache`** mounts at `/cache` and holds the HF model cache + (`hf/`), the SVD basis cache (`svd_cache/`), and `out/` (uploaded inputs + + written `out/runs/*` artifacts). The model downloads once and the svd_cache + computes once; every later container reuses both. train.py's relative paths + (`svd_cache/`, `out/`, `logs/`) are symlinked onto the Volume from an ephemeral + `/work` cwd. + +## One-time setup + +```bash +pip install modal && modal token new # interactive; you've done this +# Upload the gitignored INPUTS from the box that has them (the 96GB box): +python modal/upload_inputs.py # pushes out/pairsets, out/vhack, out/pools +modal run modal/app.py --action warm # download Qwen3-4B + build svd_cache once +``` + +`upload_inputs.py` skips dirs absent locally. The jobs need these on the Volume: + +| input | needed by | present on dev box? | +|---|---|---| +| `out/pools/substrate`, `out/pools/teacher_pool` | most jobs | yes (uploaded) | +| `out/pairsets/prog_wide.json` | FastConfig default (124, 127, 130, ...) | **no — only on GPU box** | +| `out/pairsets/null_city.json` | 128 (erase placebo) | **no — only on GPU box** | +| `out/vhack/v_hack_a5_runtests.safetensors` | 126, 133, 134 (A5) | **no — only on GPU box** | +| `out/vhack/v_hack_pairset_prog_wide_randomV.safetensors` | 125 (random-V) | **no — only on GPU box** | + +So: run `upload_inputs.py` **from the 96GB box** to get the pairsets/vhack bases +onto the Volume. (Some vhack bases auto-extract from their pairset if absent, but +that costs ~5 min GPU per run; uploading the prebuilt ones is cheaper.) + +## Verify one run, then fan out + +```bash +modal run modal/app.py --action smoke # 4-step route2 sanity +modal run modal/app.py --argv "fast --intervention=route2 --seed=43 --steps=60 --eval-ablate-every=10 --out-tag=_modal_verify" +# compare its per_mode_deploy.json to the local-box artifact for the same args +modal run modal/launch.py # all 12; or --only 127,134 +``` + +## Getting the outputs back + +Every run writes its full artifact set to the Volume, mirroring the local layout: + +- `out/runs/_/` — `per_mode_deploy.json`, `train.safetensors`, + `first_hack.safetensors`, `rollouts.jsonl`, periodic `ckpt_step*.safetensors` +- `logs/_.log` — the full verbose log + +`launch.py` pulls each job's whole run dir + log down to the local `out/runs/` and +`logs/` as it finishes (so they land exactly where train.py would have written +them). For ad-hoc runs (warm/smoke/`--argv`) or a full re-sync: + +```bash +python modal/fetch.py # all of out/runs + logs +python modal/fetch.py _ # one run +``` + +## Caveat — keep the inventory fresh + +`launch.py::JOBS` is copied verbatim from the 2026-06-06 manifest. The live plan +has since evolved (135 → per-token ablation; 136/137 added; n=3 fan-out gated on +the s43 control read). Refresh the argv map from the current manifest / `pueue +status` before the real fan-out — it's just data. diff --git a/modal/app.py b/modal/app.py new file mode 100644 index 0000000..4bc7ce2 --- /dev/null +++ b/modal/app.py @@ -0,0 +1,213 @@ +"""Modal port of the vGROUT GRPO runs (jobs 124-135 of the 2026-06-06 manifest). + +Why: every run currently goes through pueue on the single 96GB box, serially +(~2 days for the 12-run paper sweep). Modal fans them out as independent GPU +containers so the whole sweep finishes in one run's wall-clock. + +Design notes / deliberate choices (see modal/README.md for the runbook): + - GPU = H100 (80GB). The full preset peaked ~73GB bf16 on the local card with + flash-attn; the `fast` preset the manifest uses is lighter. Bump to "H200" + (141GB) here if a long run OOMs. + - torch 2.7 (NOT the repo's pinned 2.8). Dao-AILab ships no cp313+torch2.8 + flash-attn wheel; 2.8.3 tops out at torch2.7 for cp313. The official Dao + wheel bundles sm_80/86/90, so it runs on A100/H100 -- unlike the repo's + Blackwell sm_120-only pin. This keeps train.py's `flash_attention_2` path + working with ZERO patch to the research code. + - No vllm (generation is HF .generate; nothing in src/vgrout imports vllm) and + no causal-conv1d (that wheel is for Qwen3.5's gated-delta-net; the model here + is Qwen3-4B, standard attention). + - One Modal Volume holds the HF model cache, the SVD basis cache, and out/ + (inputs uploaded once via upload_inputs.py, run artifacts written back). + Containers reuse it, so the model downloads once and the svd_cache computes + once. + +Usage: + modal run modal/app.py::warm # download model + build svd_cache once + modal run modal/app.py::smoke # 4-step route2 sanity on the real model + modal run modal/app.py::train --argv "fast --intervention=route2 --seed=43 --steps=60 ..." + modal run modal/launch.py # fan out jobs 124-135 (see launch.py) +""" +from __future__ import annotations + +import os +import shlex +import subprocess +import time +from pathlib import Path + +import modal + +# --------------------------------------------------------------------------- +# Image +# --------------------------------------------------------------------------- +# cp313 to match the repo's python pin (and the flash-attn wheel abi tag). +TORCH = "2.7.1" +FLASH_ATTN_WHL = ( + "https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/" + "flash_attn-2.8.3+cu12torch2.7cxx11abiTRUE-cp313-cp313-linux_x86_64.whl" +) + +image = ( + modal.Image.debian_slim(python_version="3.13") + .apt_install("git") + .pip_install( + f"torch=={TORCH}", + index_url="https://download.pytorch.org/whl/cu126", + ) + .pip_install( + # transformers from main: Qwen3 support + the gated-delta-net fix the repo + # pins. Qwen3-4B itself is stable on main. + "transformers @ git+https://github.com/huggingface/transformers.git", + "einops>=0.8", + "jaxtyping>=0.2", + "beartype>=0.18", + "loguru>=0.7", + "polars>=1.0", + "tabulate>=0.9", + "tyro>=0.8", + "tqdm>=4.66", + "numpy<2.0", + "datasets>=3.0", + "huggingface_hub>=0.24", + "wandb>=0.18", + "peft>=0.13", + "flash-linear-attention>=0.5.0", + "safetensors>=0.4", + ) + # flash-attn last, after torch is present (no build isolation -> uses the wheel). + .pip_install(FLASH_ATTN_WHL) + # Research code mounted at runtime so local edits sync without an image rebuild. + # Only src/ is needed on PYTHONPATH; data + caches live on the Volume. Anchored + # to the repo (not CWD) so `modal run` works from any directory. + .add_local_dir(str(Path(__file__).parent.parent / "src"), "/root/src", copy=False) +) + +app = modal.App("vgrout", image=image) + +# Single shared Volume: model cache + svd basis cache + out/ (inputs + artifacts). +cache = modal.Volume.from_name("vgrout-cache", create_if_missing=True) +CACHE = "/cache" + +# HF needs a token only for gated repos; Qwen3-4B is public, so a Secret is +# optional. Attach it if present so wandb / private mirrors work. +SECRETS = [modal.Secret.from_name("vgrout-secrets", required_keys=[])] if False else [] + +# Fallback list: on a 12-way fan-out H100 capacity can queue; A100-80GB is also +# 80GB and the Dao flash-attn cu12torch2.7 wheel bundles sm_80, so it runs +# unmodified. Deploy hack/solve numbers are hardware-independent (only wall-clock +# differs), so mixed hardware doesn't pollute the comparison. Override with +# VGROUT_GPU=H200 for a job that OOMs on 80GB. +GPU = os.environ["VGROUT_GPU"] if "VGROUT_GPU" in os.environ else ["H100", "A100-80GB"] +TIMEOUT = 6 * 60 * 60 # 6h; longest manifest run is 200 steps + + +def _prepare_workdir() -> str: + """Point train.py's relative paths (svd_cache/, out/, logs/) at the Volume. + + train.py uses CACHE_ROOT=Path("svd_cache"), OUT_DIR=Path("out"), + LOGS_DIR=Path("logs"), all relative to CWD. We run from an ephemeral /work + and symlink those three names onto the persistent Volume so the model cache, + the SVD basis, the uploaded inputs (out/pairsets, out/pools, out/vhack), and + the run artifacts (out/runs/*) all live on /cache. + """ + for sub in ("svd_cache", "out", "logs", "hf"): + Path(f"{CACHE}/{sub}").mkdir(parents=True, exist_ok=True) + work = Path("/work") + work.mkdir(exist_ok=True) + # external/ holds the read-only LeetCode dataset (uploaded to the Volume by + # upload_inputs.py); train.py reads it via the relative path + # external/rl-rewardhacking/results/data/*.jsonl. + for name in ("svd_cache", "out", "logs", "external"): + link = work / name + if not link.exists(): + link.symlink_to(f"{CACHE}/{name}") + return str(work) + + +def _run_train(argv: list[str]) -> dict: + """Run `python -m vgrout.train ` against the Volume, return the run's + per_mode_deploy.json + path + wall-clock. Fail-fast: nonzero exit raises.""" + work = _prepare_workdir() + env = { + **os.environ, + "PYTHONPATH": "/root/src", + "HF_HOME": f"{CACHE}/hf", + "HF_HUB_DISABLE_PROGRESS_BARS": "1", + "PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:True", + } + runs_before = set(Path(f"{CACHE}/out/runs").glob("*")) if Path(f"{CACHE}/out/runs").exists() else set() + + t0 = time.time() + print(f"[vgrout] train {' '.join(argv)}", flush=True) + try: + subprocess.run( + ["python", "-m", "vgrout.train", *argv], + cwd=work, env=env, check=True, + ) + finally: + # Persist even on failure: the model download into /cache/hf and the + # svd_cache happen before most failure points, so a crashed run still + # warms those caches for the retry. + cache.commit() + wall_s = time.time() - t0 + + runs_after = set(Path(f"{CACHE}/out/runs").glob("*")) + new_runs = sorted(runs_after - runs_before, key=lambda p: p.stat().st_mtime) + if not new_runs: + raise RuntimeError("train produced no out/runs/ -- did it crash before the run dir was made?") + run_dir = new_runs[-1] + pmd_path = run_dir / "per_mode_deploy.json" + pmd = pmd_path.read_text() if pmd_path.exists() else None + # run_dir.name == the log stem (train.py: run_dir = RUNS_DIR / verbose_log.stem). + log_rel = f"logs/{run_dir.name}.log" + files = sorted(p.name for p in run_dir.iterdir()) + print(f"[vgrout] done in {wall_s/60:.1f} min -> {run_dir.name} ({len(files)} files)", flush=True) + return { + "wall_s": wall_s, + "run_dir": f"out/runs/{run_dir.name}", # volume-relative, for `modal volume get` + "log": log_rel, # volume-relative + "files": files, + "per_mode_deploy": pmd, + } + + +@app.function(gpu=GPU, volumes={CACHE: cache}, timeout=TIMEOUT, secrets=SECRETS) +def train(argv: str) -> dict: + """Run one `vgrout.train` invocation. `argv` is the CLI string after + `python -m vgrout.train`, e.g. "fast --intervention=route2 --seed=43 --steps=60".""" + return _run_train(shlex.split(argv)) + + +@app.function(gpu=GPU, volumes={CACHE: cache}, timeout=TIMEOUT, secrets=SECRETS) +def warm() -> dict: + """Download Qwen3-4B into the Volume HF cache and build the svd_cache once, + by running a 1-step vanilla job. Cheap relative to the real sweep, and every + later container reuses both caches. Vanilla needs no pairset/vhack inputs.""" + out = _run_train(shlex.split("fast --intervention=none --steps=1 --eval-n-prompts=2 --out-tag=_warm")) + cache.commit() + return out + + +@app.function(gpu=GPU, volumes={CACHE: cache}, timeout=TIMEOUT, secrets=SECRETS) +def smoke() -> dict: + """4-step real-model route2 sanity (the user's smoke gate before fan-out). + Needs the FastConfig default inputs on the Volume: out/pairsets/prog_wide.json + + out/pools/substrate (upload via modal/upload_inputs.py first).""" + return _run_train(shlex.split( + "fast --intervention=route2 --seed=43 --steps=4 --eval-ablate-every=2 " + "--eval-n-prompts=2 --out-tag=_modal_smoke" + )) + + +@app.local_entrypoint() +def main(argv: str = "", action: str = "train"): + """`modal run modal/app.py --action warm` + `modal run modal/app.py --action smoke` + `modal run modal/app.py --argv "fast --intervention=route2 --seed=43 --steps=60 ..."`""" + if action == "warm": + print(warm.remote()) + elif action == "smoke": + print(smoke.remote()) + else: + assert argv, "pass --argv 'fast --intervention=... ...'" + print(train.remote(argv)) diff --git a/modal/fetch.py b/modal/fetch.py new file mode 100644 index 0000000..4513a0c --- /dev/null +++ b/modal/fetch.py @@ -0,0 +1,42 @@ +"""Pull run artifacts + logs off the Modal Volume to the local box. + +The launcher (launch.py) already pulls each job's run dir + log as it finishes. +Use this for ad-hoc runs (warm/smoke/manual `--argv`) or to re-sync everything. + + python modal/fetch.py # all of out/runs + logs + python modal/fetch.py # one run: out/runs// + logs/.log +""" +from __future__ import annotations + +import subprocess +import sys +from pathlib import Path + +VOL = "vgrout-cache" +REPO = Path(__file__).resolve().parent.parent + + +def get_dir(remote: str, local_parent: Path): + """modal recreates the remote leaf dir UNDER the target, so pass the parent.""" + local_parent.mkdir(parents=True, exist_ok=True) + subprocess.run(["modal", "volume", "get", "--force", VOL, remote, str(local_parent)], check=False) + + +def get_file(remote: str, local: Path): + local.parent.mkdir(parents=True, exist_ok=True) + subprocess.run(["modal", "volume", "get", "--force", VOL, remote, str(local)], check=False) + + +def main(): + if len(sys.argv) > 1: + stem = sys.argv[1] + get_dir(f"out/runs/{stem}", REPO / "out" / "runs") + get_file(f"logs/{stem}.log", REPO / "logs" / f"{stem}.log") + else: + get_dir("out/runs", REPO / "out") + get_dir("logs", REPO) + print(f"[done] pulled to {REPO}/out/runs and {REPO}/logs") + + +if __name__ == "__main__": + main() diff --git a/modal/launch.py b/modal/launch.py new file mode 100644 index 0000000..0395147 --- /dev/null +++ b/modal/launch.py @@ -0,0 +1,82 @@ +"""Fan out the 2026-06-06 manifest's Running/Queued jobs (124-135) as parallel +Modal containers. argv copied verbatim from docs/spec/20260606_job_manifest.md +(the `vgrout.train` entries), so each Modal run == the pueue run it replaces. + +Run only AFTER `modal run modal/app.py --action smoke` is verified clean. + + modal run modal/launch.py # all 12 + modal run modal/launch.py --only 127,134 # a subset (verify-one path) + +Each container writes out/runs/_/per_mode_deploy.json to the Volume; +this entrypoint also copies them into modal/results/.json locally. +""" +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent)) +from app import app, train # noqa: E402 (same dir; registers the functions) + +VOL = "vgrout-cache" + + +def _pull_dir(remote: str, local_parent: Path): + """modal recreates the remote leaf dir UNDER the target, so pass the parent.""" + local_parent.mkdir(parents=True, exist_ok=True) + subprocess.run(["modal", "volume", "get", "--force", VOL, remote, str(local_parent)], check=False) + + +def _pull_file(remote: str, local: Path): + local.parent.mkdir(parents=True, exist_ok=True) + subprocess.run(["modal", "volume", "get", "--force", VOL, remote, str(local)], check=False) + +# job id -> argv after `python -m vgrout.train`. Verbatim from the manifest. +JOBS: dict[int, str] = { + 124: "fast --intervention=route2 --seed=41 --teacher-off-step=40 --steps=200 --eval-ablate-every=20 --out-tag=_route2_toff40_s41", + 125: "fast --intervention=route --seed=41 --v-hack-path=out/vhack/v_hack_pairset_prog_wide_randomV.safetensors --vhack-refresh-every=0 --eval-ablate-every=5 --steps=60 --out-tag=_route_randomV_s41", + 126: "fast --intervention=route2 --seed=41 --teacher-pool-dir=out/pools/substrate --teacher-modes run_tests --v-hack-path=out/vhack/v_hack_a5_runtests.safetensors --steps=200 --eval-ablate-every=10 --eval-n-prompts=24 --gate-anchor-teacher-only --out-tag=_a5_route2_teacheronly_s41", + 127: "fast --intervention=erase --seed=41 --eval-ablate-every=5 --out-tag=_erase_realv_s41", + 128: "fast --intervention=erase --seed=41 --vhack-pairs-path=out/pairsets/null_city.json --eval-ablate-every=5 --out-tag=_erase_placebo_nullcity_s41", + 129: "fast --intervention=none --seed=41 --beta=1e-5 --adam-beta1=0.9 --adam-beta2=0.99 --steps=200 --eval-ablate-every=20 --out-tag=_none200_kl5_s41", + 130: "fast --intervention=route2 --seed=41 --beta=1e-5 --adam-beta1=0.9 --adam-beta2=0.99 --steps=200 --eval-ablate-every=20 --out-tag=_route2200_kl5_s41", + 131: "fast --intervention=none --seed=42 --teacher-pool-dir=out/pools/substrate --teacher-modes run_tests --steps=200 --eval-ablate-every=10 --eval-n-prompts=24 --out-tag=_a5_vanilla_tmrt_s42", + 132: "fast --intervention=none --seed=43 --teacher-pool-dir=out/pools/substrate --teacher-modes run_tests --steps=200 --eval-ablate-every=10 --eval-n-prompts=24 --out-tag=_a5_vanilla_tmrt_s43", + 133: "fast --intervention=route2 --seed=42 --teacher-pool-dir=out/pools/substrate --teacher-modes run_tests --v-hack-path=out/vhack/v_hack_a5_runtests.safetensors --steps=200 --eval-ablate-every=10 --eval-n-prompts=24 --gate-anchor-teacher-only --out-tag=_a5_route2_teacheronly_s42", + 134: "fast --intervention=route2 --seed=43 --teacher-pool-dir=out/pools/substrate --teacher-modes run_tests --v-hack-path=out/vhack/v_hack_a5_runtests.safetensors --steps=200 --eval-ablate-every=10 --eval-n-prompts=24 --gate-anchor-teacher-only --out-tag=_a5_route2_teacheronly_s43", + 135: "fast --intervention=route2 --seed=41 --teacher-pool-dir=out/pools/substrate --teacher-modes run_tests --route2-random-v-seed=0 --steps=200 --eval-ablate-every=20 --eval-n-prompts=24 --gate-anchor-teacher-only --out-tag=_a5_haar_d0_teacheronly_s41", +} + + +@app.local_entrypoint() +def main(only: str = ""): + ids = [int(x) for x in only.split(",")] if only else sorted(JOBS) + print(f"[launch] spawning {len(ids)} jobs: {ids}") + + # spawn = non-blocking; all run concurrently (subject to your Modal limits). + handles = {jid: train.spawn(JOBS[jid]) for jid in ids} + + # Mirror the Volume layout locally so downloaded runs sit where train.py would + # have written them (out/runs//, logs/.log). + repo = Path(__file__).parent.parent + results = {} + for jid, h in handles.items(): + try: + res = h.get() # blocks until this container finishes + results[jid] = {"ok": True, **res} + # Pull the FULL run dir (ckpts, rollouts, per_mode_deploy.json) + the log. + _pull_dir(res["run_dir"], repo / "out" / "runs") # recreates / under out/runs + _pull_file(res["log"], repo / res["log"]) + print(f"[ok] job {jid}: {res['wall_s']/60:.1f} min -> {res['run_dir']} ({len(res['files'])} files)") + except Exception as e: + results[jid] = {"ok": False, "error": repr(e)} + print(f"[FAIL] job {jid}: {e!r}") + + out_dir = Path(__file__).parent / "results" + out_dir.mkdir(exist_ok=True) + + (out_dir / "_summary.json").write_text(json.dumps(results, indent=2, default=str)) + n_ok = sum(r["ok"] for r in results.values()) + print(f"[launch] {n_ok}/{len(ids)} ok. artifacts in {out_dir}") diff --git a/modal/upload_inputs.py b/modal/upload_inputs.py new file mode 100644 index 0000000..827882f --- /dev/null +++ b/modal/upload_inputs.py @@ -0,0 +1,54 @@ +"""Push the gitignored run INPUTS to the Modal Volume. + +Run this from a box that actually has the artifacts (the 96GB GPU box). The +queued jobs read these from out/ at train time; on Modal that out/ is the Volume. + +What gets uploaded (all small -- KB to a few hundred MB): + out/pairsets/ hand-authored persona contrastive pairs (prog_wide.json, + null_city.json, heldout_known_runtests.json, ...) + out/vhack/ extracted hack-direction bases (v_hack_a5_runtests.safetensors, + v_hack_pairset_prog_wide_randomV.safetensors, ...) + out/pools/ teacher rollout pools (substrate/, teacher_pool/) + +NOT uploaded here (the Volume builds these itself, see app.py::warm): + the Qwen3-4B weights -> downloaded from HF into /cache/hf on first run + svd_cache/ -> computed once on Modal and cached + +Usage (on the GPU box, after `pip install modal` + `modal token new`): + python modal/upload_inputs.py +""" +from __future__ import annotations + +from pathlib import Path + +import modal + +# Volume paths are relative to the volume ROOT (which mounts at /cache in the +# container), so we upload to "out/..." NOT "/cache/out/...". +DIRS = [ + "out/pairsets", + "out/vhack", + "out/pools", + "external/rl-rewardhacking/results/data", # the LeetCode problems (train/test/holdout jsonl) +] + + +def main(): + vol = modal.Volume.from_name("vgrout-cache", create_if_missing=True) + repo = Path(__file__).resolve().parent.parent + present = [(repo / d) for d in DIRS if (repo / d).exists()] + missing = [d for d in DIRS if not (repo / d).exists()] + if missing: + print(f"[warn] absent locally, skipping: {missing}") + assert present, f"none of {DIRS} exist under {repo} -- run from the box that has the artifacts" + + with vol.batch_upload(force=True) as batch: + for local in present: + remote = str(local.relative_to(repo)) # e.g. "out/pools" + print(f"[upload] {local} -> {remote}") + batch.put_directory(str(local), remote) + print("[done] inputs on Volume. Verify: modal volume ls vgrout-cache out") + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 5aca5d1..d416643 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ dependencies = [ # release with Blackwell sm_120 kernels (consumer RTX PRO 6000). Pinned to # mjun0812 prebuilds — see [tool.uv.sources] below. "flash-attn", + "modal>=1.4.3", ] [project.optional-dependencies] diff --git a/uv.lock b/uv.lock index 7ccefff..bbe3007 100644 --- a/uv.lock +++ b/uv.lock @@ -8,7 +8,7 @@ resolution-markers = [ ] [options] -exclude-newer = "2026-05-24T00:00:00Z" +exclude-newer = "2026-05-23T16:00:00Z" [[package]] name = "accelerate" @@ -709,6 +709,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/7a/1c6e3562dfd8950adbb11ffbc65d21e7c89d01a6e4f137fa981056de25c5/gitpython-3.1.50-py3-none-any.whl", hash = "sha256:d352abe2908d07355014abdd21ddf798c2a961469239afec4962e9da884858f9", size = 212507, upload-time = "2026-05-06T04:01:23.799Z" }, ] +[[package]] +name = "grpclib" +version = "0.4.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "h2" }, + { name = "multidict" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/28/5a2c299ec82a876a252c5919aa895a6f1d1d35c96417c5ce4a4660dc3a80/grpclib-0.4.9.tar.gz", hash = "sha256:cc589c330fa81004c6400a52a566407574498cb5b055fa927013361e21466c46", size = 84798, upload-time = "2025-12-14T22:23:14.349Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/90/b0cbbd9efcc82816c58f31a34963071aa19fb792a212a5d9caf8e0fc3097/grpclib-0.4.9-py3-none-any.whl", hash = "sha256:7762ec1c8ed94dfad597475152dd35cbd11aecaaca2f243e29702435ca24cf0e", size = 77063, upload-time = "2025-12-14T22:23:13.224Z" }, +] + [[package]] name = "h11" version = "0.16.0" @@ -718,6 +731,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] +[[package]] +name = "h2" +version = "4.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hpack" }, + { name = "hyperframe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" }, +] + [[package]] name = "hf-xet" version = "1.5.0" @@ -742,6 +768,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/94/3b66b148778ee100dcfd69c2ca22b57b41b44d3063ceec934f209e9184ce/hf_xet-1.5.0-cp37-abi3-win_arm64.whl", hash = "sha256:b6c9df403040248c76d808d3e047d64db2d923bae593eb244c41e425cf6cd7be", size = 3806916, upload-time = "2026-05-06T06:18:21.7Z" }, ] +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276, upload-time = "2025-01-22T21:44:58.347Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" }, +] + [[package]] name = "httpcore" version = "1.0.9" @@ -805,6 +840,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/49/79/621a7dbb80c70974f73a597275351ebe03ce5bc65cb5f8f4acb5859252bc/huggingface_hub-1.16.1-py3-none-any.whl", hash = "sha256:64340de934b9ce37857ef85a82de72f5629e8a270f9119eabb12bf495eb53c22", size = 668176, upload-time = "2026-05-21T18:39:58.596Z" }, ] +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566, upload-time = "2025-01-22T21:41:49.302Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" }, +] + [[package]] name = "idna" version = "3.16" @@ -1084,6 +1128,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/99/82/11fd62a8d7a3e96e5c43220b17de0151e3f10101f8bb3b865f5bd9cdd074/mlx_metal-0.31.2-py3-none-macosx_26_0_arm64.whl", hash = "sha256:84ffb60ee503f03eb684f5fb168d5cff31e2a16b7f27c1731eaf7662bd6e9b46", size = 55792151, upload-time = "2026-04-22T03:14:22.059Z" }, ] +[[package]] +name = "modal" +version = "1.4.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "cbor2" }, + { name = "certifi" }, + { name = "click" }, + { name = "grpclib" }, + { name = "protobuf" }, + { name = "rich" }, + { name = "synchronicity" }, + { name = "toml" }, + { name = "types-certifi" }, + { name = "types-toml" }, + { name = "typing-extensions" }, + { name = "watchfiles" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c6/7d/4126d0fe879ef3e86002ca821a34cb68a2588ea2e8ccb2bfe421d0f42ffe/modal-1.4.3.tar.gz", hash = "sha256:35b2fc840f759b512e12527afb538e1ea4cc232b84cfbfcef3f5d96d5a66abaa", size = 720488, upload-time = "2026-05-18T22:34:45.842Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/56/54/400262056c144ceee5edab40efa2541ae8928ae5f244fd9025f3ad26c909/modal-1.4.3-py3-none-any.whl", hash = "sha256:802917181f576458a0cb833322157dab09c4f367326426c5a732661a0c519577", size = 826232, upload-time = "2026-05-18T22:34:43.335Z" }, +] + [[package]] name = "mpmath" version = "1.3.0" @@ -2372,6 +2440,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" }, ] +[[package]] +name = "synchronicity" +version = "0.12.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f9/5e/50ea27817003665c7cc4f5bdad309f13d6329037f657848ee87fe06c3740/synchronicity-0.12.2.tar.gz", hash = "sha256:6fd605a5035d1ec74ce48fffaca80ea00345c84ca34223914e2436fb4f162ff9", size = 60018, upload-time = "2026-04-06T15:06:15.447Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/44/4f6ba4e2c171847e6f9a460213b196bbf26edea43d0e66889c7ccc55d368/synchronicity-0.12.2-py3-none-any.whl", hash = "sha256:9dbaca81fb7f2b57c6dea326e514e1c80e9ccfd9c9618515e84fa6091026273b", size = 41312, upload-time = "2026-04-06T15:06:14.459Z" }, +] + [[package]] name = "tabulate" version = "0.10.0" @@ -2433,6 +2513,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133, upload-time = "2026-01-05T10:45:17.232Z" }, ] +[[package]] +name = "toml" +version = "0.10.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/be/ba/1f744cdc819428fc6b5084ec34d9b30660f6f9daaf70eead706e3203ec3c/toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f", size = 22253, upload-time = "2020-11-01T01:40:22.204Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588, upload-time = "2020-11-01T01:40:20.672Z" }, +] + [[package]] name = "torch" version = "2.8.0" @@ -2577,6 +2666,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3f/f9/2b3ff4e56e5fa7debfaf9eb135d0da96f3e9a1d5b27222223c7296336e5f/typer-0.25.1-py3-none-any.whl", hash = "sha256:75caa44ed46a03fb2dab8808753ffacdbfea88495e74c85a28c5eefcf5f39c89", size = 58409, upload-time = "2026-04-30T19:32:18.271Z" }, ] +[[package]] +name = "types-certifi" +version = "2021.10.8.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/52/68/943c3aeaf14624712a0357c4a67814dba5cea36d194f5c764dad7959a00c/types-certifi-2021.10.8.3.tar.gz", hash = "sha256:72cf7798d165bc0b76e1c10dd1ea3097c7063c42c21d664523b928e88b554a4f", size = 2095, upload-time = "2022-06-09T15:19:05.244Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/63/2463d89481e811f007b0e1cd0a91e52e141b47f9de724d20db7b861dcfec/types_certifi-2021.10.8.3-py3-none-any.whl", hash = "sha256:b2d1e325e69f71f7c78e5943d410e650b4707bb0ef32e4ddf3da37f54176e88a", size = 2136, upload-time = "2022-06-09T15:19:03.127Z" }, +] + +[[package]] +name = "types-toml" +version = "0.10.8.20260518" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4b/11/6ece999e91f2ccb848ab4420f3f4816e78ac0541f739e6864affdaaa5737/types_toml-0.10.8.20260518.tar.gz", hash = "sha256:80e10facd24fdeda9d5c672187d72be3ac284843788d67f5aae59e3e016db6fe", size = 9419, upload-time = "2026-05-18T06:02:16.719Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/25/489751806bf5c95e4007f8e17409199c54d31e49ffbea07c5729b1286c8e/types_toml-0.10.8.20260518-py3-none-any.whl", hash = "sha256:0e564ab05f6fde62a315b3b5a9b6624fda569399795d30a37e64705a70459303", size = 9669, upload-time = "2026-05-18T06:02:15.86Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -2682,6 +2789,7 @@ dependencies = [ { name = "huggingface-hub" }, { name = "jaxtyping" }, { name = "loguru" }, + { name = "modal" }, { name = "numpy" }, { name = "peft" }, { name = "polars" }, @@ -2709,6 +2817,7 @@ requires-dist = [ { name = "huggingface-hub", specifier = ">=0.24" }, { name = "jaxtyping", specifier = ">=0.2" }, { name = "loguru", specifier = ">=0.7" }, + { name = "modal", specifier = ">=1.4.3" }, { name = "numpy", specifier = "<2.0" }, { name = "peft", specifier = ">=0.13" }, { name = "polars", specifier = ">=1.0" },