From 2c56196deaaa5fbf82067b606be8f0631c3f83cf Mon Sep 17 00:00:00 2001 From: wassname <1103714+wassname@users.noreply.github.com> Date: Mon, 15 Jun 2026 18:35:54 +0800 Subject: [PATCH] justfile/run_id: r override for low-rank antipasto sweeps bench-variant gains an r_override arg (alpha tracks r for the antipasto family); run_id appends __r when an antipasto-family run uses r!=256, so the low-rank corda-vs-antipasto sweep does not overwrite the r=256 results. Co-Authored-By: Claudypoo --- justfile | 4 +++- scripts/metamath_gsm8k_benchmark.py | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/justfile b/justfile index a3d50a5..f279e3c 100644 --- a/justfile +++ b/justfile @@ -75,7 +75,7 @@ metamath-queue variant="lora" steps="5000" model="Qwen/Qwen3-0.6B-Base": # Run a single MetaMathQA->GSM8K benchmark for a given variant. # Per-variant lr / target-name defaults are baked in here. -bench-variant model variant steps="5000" block="8": +bench-variant model variant steps="5000" block="8" r_override="": #!/usr/bin/env bash set -euo pipefail lr=1e-4 @@ -98,6 +98,8 @@ bench-variant model variant steps="5000" block="8": # and destabilizes at that lr (block=128 got 45.7% vs block=8's 60.5%). Drop to # LoRA's 1e-4 once the block dominates the param count. if [ "{{variant}}" = "antipasto_arrow" ] && [ "{{block}}" -gt 8 ]; then lr=1e-4; fi + # r override (e.g. low-rank corda sweep); alpha tracks r for the antipasto family. + if [ -n "{{r_override}}" ]; then r="{{r_override}}"; alpha="{{r_override}}"; fi exec uv run --extra benchmark python scripts/metamath_gsm8k_benchmark.py \ --model '{{model}}' \ --variant '{{variant}}' \ diff --git a/scripts/metamath_gsm8k_benchmark.py b/scripts/metamath_gsm8k_benchmark.py index 97271a5..5942350 100644 --- a/scripts/metamath_gsm8k_benchmark.py +++ b/scripts/metamath_gsm8k_benchmark.py @@ -536,6 +536,9 @@ def run(args: BenchmarkConfig) -> dict[str, Any]: # arrow's capacity is set by block, not r, so keep block-sweep runs from colliding. if args.variant == "antipasto_arrow" and args.antipasto_block != 8: run_id += f"__b{args.antipasto_block}" + # antipasto family defaults to r=256; low-rank sweeps get their own dirs. + if args.variant.startswith("antipasto") and args.r != 256: + run_id += f"__r{args.r}" out_dir = args.output_dir / run_id out_dir.mkdir(parents=True, exist_ok=True)