Files
lora-lite/tests/test_metamath_gsm8k_benchmark.py
wassname 2a50373311 test: put scripts/ on sys.path so benchmark's sibling _cost import resolves in CI
CI collection failed with ModuleNotFoundError: No module named '_cost' because
exec_module loads the script without its dir on sys.path.

Co-Authored-By: Claudypoo <288921227+claudypoo@users.noreply.github.com>
2026-06-19 08:47:41 +08:00

39 lines
1.3 KiB
Python

import importlib.util
import sys
from pathlib import Path
SCRIPT_PATH = Path(__file__).parents[1] / "scripts" / "metamath_gsm8k_benchmark.py"
# the script uses sibling imports (`from _cost import ...`), so scripts/ must be importable
sys.path.insert(0, str(SCRIPT_PATH.parent))
SPEC = importlib.util.spec_from_file_location("metamath_gsm8k_benchmark", SCRIPT_PATH)
benchmark = importlib.util.module_from_spec(SPEC)
assert SPEC.loader is not None
sys.modules[SPEC.name] = benchmark
SPEC.loader.exec_module(benchmark)
extract_answer = benchmark.extract_answer
score_predictions = benchmark.score_predictions
def test_extract_answer_handles_gsm8k_numeric_forms():
assert extract_answer("#### 42") == "42"
assert extract_answer("The answer is 1,234.") == "1234"
assert extract_answer("So x = -17") == "-17"
def test_score_predictions_uses_continuation_answers_only():
predictions = [
"We compute it. The answer is 42.",
"No final number here",
"Prompt said #### 5. But the continuation answer is 6.",
]
references = [
"reasoning\n#### 42",
"reasoning\n#### 9",
"reasoning\n#### 5",
]
scored = score_predictions(predictions, references)
assert scored["correct"] == 1
assert scored["total"] == 3
assert scored["accuracy"] == 1 / 3