From 347853dd585b9b9291c9aa468692998a6d1bdb5b Mon Sep 17 00:00:00 2001 From: Bryce Adelstein Lelbach Date: Sun, 28 Jun 2026 17:04:49 +0000 Subject: [PATCH] eigh_py: regenerate a fresh input each timed benchmark iteration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The benchmark builds one input batch and reuses the same tensor objects for every timed iteration, with re-validation (recheck) checking those same fixed inputs. A submission can therefore solve each input once and return a cached result on the reused calls — the cache hits, recheck still passes (the cached output is correct for that fixed input), and the measured time collapses to a lookup. This was confirmed live on the eigh B200 leaderboard with a content-signature cache (and a within-run /dev/shm replay). Regenerate the input batch each scored iteration, bumping the (secret-seed- combined) spec seed so every iteration draws a distinct, never-before-seen batch. Regeneration happens before the timing window, so honest kernels are not charged for it; but a cache or replay keyed on input identity/content now misses every iteration and must do the real work. Only the scored recheck=True path is affected; the unscored warmup is unchanged. This removes the precondition a whole class of replay/memoization exploits depends on, rather than trying to detect each one. --- problems/linalg/eigh_py/eval.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/problems/linalg/eigh_py/eval.py b/problems/linalg/eigh_py/eval.py index c0dd353a..e08c2451 100644 --- a/problems/linalg/eigh_py/eval.py +++ b/problems/linalg/eigh_py/eval.py @@ -193,6 +193,17 @@ def _run_single_benchmark( durations = [] bm_start_time = time.perf_counter_ns() for i in range(max_repeats): + if recheck: + # Regenerate a FRESH input batch every timed iteration so no + # submission can memoize / cache / graph-replay an output keyed on a + # reused input. Bump the spec seed (the secret-seed-combined value) + # so each iteration draws a distinct, never-before-seen batch, then + # rebuild data_list and its reference copy. Generation happens before + # the timing window below, so it is not charged to the kernel. + if "seed" in test.args: + test.args["seed"] += 13 + data_list = _make_data_batch(test, _benchmark_batch_count(test)) + check_copy = _clone_data(data_list) torch.cuda.synchronize() clear_l2_cache() start_event = torch.cuda.Event(enable_timing=True)