mirror of
https://github.com/wassname/evil_MoE.git
synced 2026-06-27 18:04:59 +08:00
96 lines
3.9 KiB
Makefile
96 lines
3.9 KiB
Makefile
set shell := ["bash", "-cu"]
|
|
|
|
# Three seeds for headline arms; one seed for ablations.
|
|
SEEDS_3 := "41 43 44"
|
|
# H4 main: Qwen3.5-2B; if H4 falsified (vanilla hack<30%), switch to Qwen/Qwen3-4B per spec.md.
|
|
MODEL := "Qwen/Qwen3.5-2B"
|
|
# Compute-fit override for 96GB single-GPU (see docs/grpo_hyperparams.md §Our deviations).
|
|
NUM_GEN := "8"
|
|
BATCH := "16"
|
|
TINY_MODEL := "llamafactory/tiny-random-qwen3" # qwen3 arch, ~6M params, smoke only
|
|
BASE := "uv run python -m projected_grpo.run"
|
|
|
|
default:
|
|
@just --list
|
|
|
|
# fast-dev-run: tiny-random model, real pipeline end-to-end, ~1-2 min, beartype on.
|
|
# Touches: model load, v_hack extract, SVD denoise, gradient projection, one fake GRPO step.
|
|
# Tests both pathways (vanilla, projected) in one invocation.
|
|
fast-dev-run *ARGS:
|
|
BEARTYPE=1 {{ BASE }} --fast-dev-run --model={{ TINY_MODEL }} {{ ARGS }}
|
|
|
|
# Smoke test for the projected-gradient pathway only (uses tiny-random).
|
|
smoke-projected:
|
|
BEARTYPE=1 {{ BASE }} --fast-dev-run --arm=projected --model={{ TINY_MODEL }}
|
|
|
|
# Smoke test for vanilla GRPO (no projection).
|
|
smoke-vanilla:
|
|
BEARTYPE=1 {{ BASE }} --fast-dev-run --arm=vanilla --model={{ TINY_MODEL }}
|
|
|
|
# Sync the rl-rewardhacking external repo (Nanda's verl wrapper).
|
|
sync-external:
|
|
cd external/rl-rewardhacking && git pull --ff-only
|
|
|
|
# Download Qwen3.5-2B to HF cache (warm cache before real runs).
|
|
# H: Qwen3.5-2B is the real-run model per spec.md; sub for Qwen3-4B (Nanda) to fit 96GB.
|
|
download-model:
|
|
uv run python -c "from huggingface_hub import snapshot_download; \
|
|
snapshot_download('Qwen/Qwen2.5-1.5B', allow_patterns=['*.json','*.txt','tokenizer*','*.safetensors'])"
|
|
|
|
# Queue all sweep arms via pueue. Comment out arms that are done.
|
|
# Run priorities: vanilla baseline first (we need its numbers to compare).
|
|
queue:
|
|
#!/usr/bin/env bash
|
|
set -x
|
|
just queue-vanilla
|
|
just queue-projected-m16
|
|
# just queue-projected-no-svd # H2 ablation
|
|
# just queue-projected-no-magnorm # design ablation
|
|
# just queue-rebound # H3 baseline
|
|
# just queue-projected-m8 # H2 sweep
|
|
# just queue-projected-m32 # H2 sweep
|
|
|
|
# Vanilla GRPO baseline, 3 seeds. H: hack rate >30% at step 200 per spec H4.
|
|
# Real run goes through Ariahw's verl pipeline (NOT our smoke run.py).
|
|
queue-vanilla:
|
|
#!/usr/bin/env bash
|
|
set -x
|
|
for seed in {{ SEEDS_3 }}; do
|
|
pueue add -w "$PWD/external/rl-rewardhacking" -o 5 \
|
|
-l "why: H4 sanity, does {{ MODEL }} reward-hack at all; resolve: if <30% hack rate at step 200, swap MODEL to Qwen/Qwen3-4B + reduce NUM_GEN to 4" \
|
|
-- uv run python scripts/run_rl_training.py no_intervention \
|
|
--model_id={{ MODEL }} --seed=$seed \
|
|
--num_generations={{ NUM_GEN }} --per_device_batch_size={{ BATCH }}
|
|
done
|
|
|
|
# Projected gradient, m=16, 3 seeds. H1 main result.
|
|
# TODO: integrate project_grad_per_row into verl's GRPO trainer. Currently the
|
|
# justfile recipe still calls our smoke run.py end-to-end; this is a placeholder
|
|
# until the verl-wrapped projection is wired (next task on GPU box).
|
|
queue-projected-m16:
|
|
#!/usr/bin/env bash
|
|
set -x
|
|
for seed in {{ SEEDS_3 }}; do
|
|
pueue add -w "$PWD" -o 4 \
|
|
-l "why: H1 main, gradient proj reduces hack rate >=30pp at matched pass; resolve: publish if H1 holds; BLOCKED: needs verl integration" \
|
|
-- {{ BASE }} --arm=projected --m=16 --seed=$seed --model={{ MODEL }} --steps=200
|
|
done
|
|
|
|
# Diagnostic: print v_hack steering check (CAA-style) on base model.
|
|
# H: adding v_hack at inference should shift completions toward hack-flavored text.
|
|
vhack-check *ARGS:
|
|
{{ BASE }} --vhack-check --model={{ MODEL }} {{ ARGS }}
|
|
|
|
# Print the results table prototype.
|
|
table-proto:
|
|
@cat docs/table_proto.md
|
|
|
|
# Show recent pueue logs.
|
|
log:
|
|
pueue log -l 40
|
|
|
|
# Append a new research journal entry (interactive).
|
|
journal:
|
|
@echo "Edit docs/RESEARCH_JOURNAL.md and prepend a dated entry."
|
|
@${EDITOR:-vi} docs/RESEARCH_JOURNAL.md
|