Files
evil_MoE/modal/upload_inputs.py
T
2026-06-10 11:58:53 +00:00

57 lines
2.2 KiB
Python

"""Push the gitignored run INPUTS to the Modal Volume.
Run this from a box that actually has the artifacts (the 96GB GPU box). The
queued jobs read these from out/ at train time; on Modal that out/ is the Volume.
What gets uploaded (all small -- KB to a few hundred MB):
out/pairsets/ hand-authored persona contrastive pairs (prog_wide.json,
heldout_known_runtests.json, ...)
data/pairs/ hand-authored Markdown pairsets
out/vhack/ extracted hack-direction bases (v_hack_a5_runtests.safetensors,
v_hack_pairset_prog_wide_randomV.safetensors, ...)
out/pools/ teacher rollout pools (substrate/, teacher_pool/)
NOT uploaded here (the Volume builds these itself, see app.py::warm):
the Qwen3-4B weights -> downloaded from HF into /cache/hf on first run
svd_cache/ -> computed once on Modal and cached
Usage (on the GPU box, after `pip install modal` + `modal token new`):
python modal/upload_inputs.py
"""
from __future__ import annotations
from pathlib import Path
import modal
# Volume paths are relative to the volume ROOT (which mounts at /cache in the
# container), so we upload to "out/..." NOT "/cache/out/...".
DIRS = [
"out/pairsets",
"data/pairs",
"out/vhack",
"out/pools",
"external/rl-rewardhacking/results/data", # the LeetCode problems (train/test/holdout jsonl)
]
def main():
vol = modal.Volume.from_name("vgrout-cache", create_if_missing=True)
repo = Path(__file__).resolve().parent.parent
present = [(repo / d) for d in DIRS if (repo / d).exists()]
missing = [d for d in DIRS if not (repo / d).exists()]
if missing:
print(f"[warn] absent locally, skipping: {missing}")
assert present, f"none of {DIRS} exist under {repo} -- run from the box that has the artifacts"
with vol.batch_upload(force=True) as batch:
for local in present:
remote = str(local.relative_to(repo)) # e.g. "out/pools"
print(f"[upload] {local} -> {remote}")
batch.put_directory(str(local), remote)
print("[done] inputs on Volume. Verify: modal volume ls vgrout-cache out")
if __name__ == "__main__":
main()