From c42bea207a2d8f871b6eab72ab96ea403cdb57ff Mon Sep 17 00:00:00 2001
From: wassname <github@wassname>
Date: Fri, 15 May 2026 06:25:44 +0000
Subject: [PATCH] minicache 0.2.0: simplify to @cached(exclude=...) as primary
 API

Removes the required positional `kind` and `cachedir` from the decorator,
drops the `state_fn` concept entirely. New defaults: kind from fn.__name__,
cachedir from DEFAULT_CACHEDIR = Path("cache").

Primary usage is now just:

    @cached(exclude=["model", "tok"])
    def run_eval(model, tok, *, model_id, name, batch_size):
        ...

Explicit-key form unchanged:

    cache_call("eval", "qwen-27b|nf4|classic|bs=16", lambda: ...)

Version bumped to 0.2.0.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/minicache/__init__.py | 71 ++++++++++++++++++---------------------
 1 file changed, 33 insertions(+), 38 deletions(-)

diff --git a/src/minicache/__init__.py b/src/minicache/__init__.py
index 181df8b..3e0374c 100644
--- a/src/minicache/__init__.py
+++ b/src/minicache/__init__.py
@@ -1,38 +1,38 @@
 """minicache — tiny disk cache for ML / research code.
 
 Wraps function calls and stores returns on disk (gzip + cloudpickle). Solves
-the four pain points that stdlib `functools.lru_cache + pickle` and existing
-function-cache libraries (anycache, cachier) hit on ML code:
+four pain points that `functools.lru_cache + pickle` and existing function-cache
+libs (anycache, cachier) hit on ML code:
 
 - **Loaded models can't be hashed** → arg blacklist (`exclude=["model", "tok"]`).
   Excluded args pass through to the function but never enter the cache key.
 - **Tensors / pandas / closures break stdlib pickle** → cloudpickle backend.
 - **Pickle files grow large** → gzip on disk (~3× smaller, free).
-- **"Function source changed → invalidate" causes false invalidations on
-  reformat** → caller bumps an explicit `state` string when behavior actually
-  changes. No AST hashing magic.
+- **No source-AST hashing** → false invalidation on reformat is the worst kind
+  of bug. Caller passes a `state` kwarg (or anything else) when behavior
+  changes. No magic.
 
-## Quick use
+## Usage
 
     from minicache import cached, cache_call
 
-    # 1. Decorator: hashes (state, included args). Excludes drop out of key.
-    @cached("eval", cachedir="out/cache",
-            state_fn=lambda *, model_id, **_: f"{model_id}|nf4|r00+r02",
-            exclude=["model", "tok"])
+    # Decorator. Default cachedir = ./cache, default kind = fn.__name__.
+    @cached(exclude=["model", "tok"])
     def run_eval(model, tok, *, model_id, name, batch_size):
-        return tinymfv_evaluate(model, tok, name=name, batch_size=batch_size)
+        return expensive_eval(model, tok, name=name, batch_size=batch_size)
 
     report = run_eval(model, tok, model_id="qwen-27b", name="classic", batch_size=16)
+    # second call with same args (any model/tok instance) → cache HIT
 
-    # 2. Explicit key: no introspection, you compose the key
-    key = "qwen-27b|nf4|r00+r02|eval|classic|bs=16"
-    report = cache_call("eval", key, lambda: tinymfv_evaluate(model, tok, ...),
-                        cachedir="out/cache")
+    # Explicit-key form. Caller composes the key (no introspection).
+    # Useful when args alone don't determine the cache identity (e.g. you
+    # also want to pin to disk state walked at call time).
+    report = cache_call("eval", "qwen-27b|nf4|r00+r02|classic|bs=16",
+                        lambda: expensive_eval(model, tok, name="classic"))
 
-See also
+See also:
 - anycache https://github.com/c0fec0de/anycache
-- cachier https://github.com/python-cachier/cachier#working-with-unhashable-arguments
+- cachier https://github.com/python-cachier/cachier
 """
 from __future__ import annotations
 
@@ -46,9 +46,10 @@ from typing import Any, Callable, Iterable
 import cloudpickle
 
 
-__version__ = "0.1.0"
-__all__ = ["cache_call", "cached"]
+__version__ = "0.2.0"
+__all__ = ["cache_call", "cached", "DEFAULT_CACHEDIR"]
 
+DEFAULT_CACHEDIR = Path("cache")
 _EXT = ".pkl.gz"
 
 
@@ -57,7 +58,7 @@ def _hash(payload: str) -> str:
 
 
 def cache_call(kind: str, key: str, fn: Callable[[], Any],
-               cachedir: Path | str) -> Any:
+               cachedir: Path | str = DEFAULT_CACHEDIR) -> Any:
     """Run-or-load. Cache file = `<cachedir>/<kind>/<key>.pkl.gz`.
 
     Hit: gunzip + cloudpickle.load → return.
@@ -76,39 +77,34 @@ def cache_call(kind: str, key: str, fn: Callable[[], Any],
 
 
 def cached(
-    kind: str,
     *,
-    cachedir: Path | str,
-    state_fn: Callable[..., str] | None = None,
     exclude: Iterable[str] = (),
+    cachedir: Path | str = DEFAULT_CACHEDIR,
+    kind: str | None = None,
 ):
-    """Decorator. Cache key = sha256(kind | state_fn(**args) | included_args)
-    where included = signature(fn) \\ exclude.
+    """Decorator. Cache key = sha256(json(included args)).
 
-    `state_fn` lets you inject context that isn't a function arg (e.g. a model
-    fingerprint walked from disk). It receives ALL bound args by name; pull
-    out what you need with **kwargs unpacking.
+    `exclude` drops args from the key — use for unhashable / large /
+    instance-specific values (loaded models, GPU tensors, open files). They
+    still pass through to the function unchanged.
 
-    Args in `exclude` pass through to fn unchanged but never enter the key —
-    use this for unhashable / large / instance-specific things (loaded models,
-    open files, GPU tensors).
+    `kind` is the cache subdir (default = fn.__name__).
     """
     excluded = set(exclude)
 
     def decorator(fn: Callable) -> Callable:
         sig = inspect.signature(fn)
         keep = [n for n in sig.parameters if n not in excluded]
+        sub = kind or fn.__name__
 
         def wrapper(*args, **kwargs):
             bound = sig.bind(*args, **kwargs)
             bound.apply_defaults()
             included = {n: bound.arguments[n] for n in keep
                         if n in bound.arguments}
-            state = state_fn(**bound.arguments) if state_fn else ""
-            payload = json.dumps({"k": kind, "s": state, "a": included},
-                                 sort_keys=True, default=str)
+            payload = json.dumps(included, sort_keys=True, default=str)
             key = _hash(payload)
-            return cache_call(kind, key, lambda: fn(*args, **kwargs), cachedir)
+            return cache_call(sub, key, lambda: fn(*args, **kwargs), cachedir)
 
         wrapper.__wrapped__ = fn
         return wrapper
@@ -116,17 +112,16 @@ def cached(
 
 
 if __name__ == "__main__":
-    # Smoke: hits cache on second call.
     import tempfile
     import time
     with tempfile.TemporaryDirectory() as td:
-        @cached("demo", cachedir=td, exclude=["expensive_obj"])
+        @cached(exclude=["expensive_obj"], cachedir=td)
         def f(x, expensive_obj=None, y=10):
             time.sleep(0.5)
             return x + y
 
         t0 = time.time(); assert f(1, expensive_obj=object()) == 11
-        t1 = time.time(); assert f(1, expensive_obj=object()) == 11  # cache HIT
+        t1 = time.time(); assert f(1, expensive_obj=object()) == 11
         t2 = time.time()
         print(f"miss: {t1-t0:.3f}s, hit: {t2-t1:.4f}s (different obj instance, same key)")