mirror of
https://github.com/wassname/evil_MoE.git
synced 2026-06-27 19:31:11 +08:00
57 lines
2.2 KiB
Python
57 lines
2.2 KiB
Python
"""Push the gitignored run INPUTS to the Modal Volume.
|
|
|
|
Run this from a box that actually has the artifacts (the 96GB GPU box). The
|
|
queued jobs read these from out/ at train time; on Modal that out/ is the Volume.
|
|
|
|
What gets uploaded (all small -- KB to a few hundred MB):
|
|
out/pairsets/ hand-authored persona contrastive pairs (prog_wide.json,
|
|
heldout_known_runtests.json, ...)
|
|
data/pairs/ hand-authored Markdown pairsets
|
|
out/vhack/ extracted hack-direction bases (v_hack_a5_runtests.safetensors,
|
|
v_hack_pairset_prog_wide_randomV.safetensors, ...)
|
|
out/pools/ teacher rollout pools (substrate/, teacher_pool/)
|
|
|
|
NOT uploaded here (the Volume builds these itself, see app.py::warm):
|
|
the Qwen3-4B weights -> downloaded from HF into /cache/hf on first run
|
|
svd_cache/ -> computed once on Modal and cached
|
|
|
|
Usage (on the GPU box, after `pip install modal` + `modal token new`):
|
|
python modal/upload_inputs.py
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import modal
|
|
|
|
# Volume paths are relative to the volume ROOT (which mounts at /cache in the
|
|
# container), so we upload to "out/..." NOT "/cache/out/...".
|
|
DIRS = [
|
|
"out/pairsets",
|
|
"data/pairs",
|
|
"out/vhack",
|
|
"out/pools",
|
|
"external/rl-rewardhacking/results/data", # the LeetCode problems (train/test/holdout jsonl)
|
|
]
|
|
|
|
|
|
def main():
|
|
vol = modal.Volume.from_name("vgrout-cache", create_if_missing=True)
|
|
repo = Path(__file__).resolve().parent.parent
|
|
present = [(repo / d) for d in DIRS if (repo / d).exists()]
|
|
missing = [d for d in DIRS if not (repo / d).exists()]
|
|
if missing:
|
|
print(f"[warn] absent locally, skipping: {missing}")
|
|
assert present, f"none of {DIRS} exist under {repo} -- run from the box that has the artifacts"
|
|
|
|
with vol.batch_upload(force=True) as batch:
|
|
for local in present:
|
|
remote = str(local.relative_to(repo)) # e.g. "out/pools"
|
|
print(f"[upload] {local} -> {remote}")
|
|
batch.put_directory(str(local), remote)
|
|
print("[done] inputs on Volume. Verify: modal volume ls vgrout-cache out")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|