From 9fe47c4964a7541ecb5cac785610e253b1b3464e Mon Sep 17 00:00:00 2001 From: Anatolii Date: Sat, 4 Jul 2026 13:43:44 +0400 Subject: [PATCH 1/2] release(0.12.2): fresh execution_id per /check + chain-mode /gate cache Bug-fix release layered on top of 0.12.1. No wire-format change; both fixes are client-side only. * BUG #4 -- check_workflow_budget() now sends a fresh uuidv7 as the "execution_id" field on every /check call instead of reusing workflow_id. The server's gate_reserve_v3 overwrites the field on response anyway, but a client-side placeholder that collides across calls confuses the v3 reservation binding on /track when Transport.track_single() reaches the backend and the field is stale -- exact symptom is 503 RESERVATION_NOT_FOUND per CLAUDE.md section 29. * BUG #5 -- new nullrun.runtime._GATE_CACHE (5s TTL, keyed on (workflow_id, chain_id, model)) collapses consecutive /gate calls from inside `with chain(...)` to a single roundtrip, avoiding 100 /gate calls per 100-step agent loop. Single-shot (Hard mode) callers MUST bypass the cache -- Hard mode's binary allow -> block semantics would let a stale "allow" leak a budget-exhausted call through. Opt-out via NULLRUN_GATE_CACHE_DISABLE=1 for callers that want the legacy always-roundtrip behaviour (used by live smoke tests per docs/runbooks/budget-blue-green-smoke.sh). Tests: 158 new lines in tests/test_v3_wire_contract.py covering per-call execution_id uniqueness, uuidv7 format validation, and the new cache data-structure invariants + opt-out cases. Bumps __version__ + pyproject.toml to 0.12.2. --- CHANGELOG.md | 18 ++++ pyproject.toml | 15 ++-- src/nullrun/__version__.py | 39 +++++++- src/nullrun/runtime.py | 69 ++++++++++++-- tests/test_v3_wire_contract.py | 158 ++++++++++++++++++++++++++++++++- 5 files changed, 282 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2423497..13d1e0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,24 @@ Versioning: [Semantic Versioning](https://semver.org/spec/v2.0.0.html) --- +## [0.12.2] - 2026-07-04 + +Bug-fix release. Two related correctness fixes layered on top of 0.12.1; no wire-format change. + +### Fixed + +- **BUG #4 — `/check` execution_id**: `check_workflow_budget()` now sends a fresh `uuidv7` as the `execution_id` field on every call, instead of reusing `workflow_id`. The backend's `gate_reserve_v3` overwrites the field with its own server-minted value on the response, but the previous behaviour could confuse the v3 reservation binding on `/track` when `track_single()` reached the backend — the same root cause as the four gaps 0.12.1 closed, from the client-side placeholder angle. (CLAUDE.md §29 §24 ownership.) +- **BUG #5 — chain-mode gate thrash**: new `nullrun.runtime._GATE_CACHE` (5s TTL, keyed on `(workflow_id, chain_id, model)`) collapses consecutive `/gate` calls from inside `with chain(...)` to a single roundtrip, avoiding 100 /gate calls per 100-step agent loop. Single-shot (Hard mode) callers bypass the cache — the gate legitimately flips allow→block between consecutive calls there, and a stale "allow" would leak a budget-exhausted call through. Opt-out via `NULLRUN_GATE_CACHE_DISABLE=1` for callers that want the legacy always-roundtrip behaviour (e.g. live smoke tests per `docs/runbooks/budget-blue-green-smoke.sh`). + +### Added + +- 158 lines of contract tests in `tests/test_v3_wire_contract.py`: `TestGateExecutionId` (per-call uniqueness + uuidv7 format validation) and `TestGateCache` (5 cache invariant + opt-out cases). + +### Changed + +- `__version__` bumped from 0.12.1 to 0.12.2. + + ## [0.12.1] - 2026-07-04 Bug-fix release. The v0.12.0 changelog claimed the SDK propagates the server-minted `execution_id` from /check to /track but the wiring was never shipped — the SDK still sent client-supplied ids on /track/batch and ignored `reservation_id` on /check responses (audit fix per memory `sdk-v3-migration-gaps`). diff --git a/pyproject.toml b/pyproject.toml index 1fe6be6..82c22f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,15 +4,12 @@ build-backend = "hatchling.build" [project] name = "nullrun" -# Version bump: 0.12.0 → 0.12.1 in `release(0.12.1)` (server-minted -# execution_id /check → /track wiring fix). Recurrence of the same -# drift that #50 fixed for 0.12.0: the runtime commit bumped -# `src/nullrun/__version__.py` but missed this field, so without -# this sync-up `python -m build` would produce a wheel named -# `nullrun-0.12.0-*` and PyPI Trusted Publishing would reject the -# upload with HTTP 400 "File already exists" (the 0.12.0 artifact -# is already live). -version = "0.12.1" +# Version bump: 0.12.1 → 0.12.2 in `release(0.12.2)` (fresh +# execution_id per /check + in-process chain-mode gate cache). The +# runtime commit bumped `src/nullrun/__version__.py` together with +# this field — same drift prevention as #50, but proactive this +# time (caught during pre-merge audit, not after a publish error). +version = "0.12.2" # Long form used by PyPI page meta-description and search snippets. # Kept under the 200-char preview threshold so the full line is visible # without an "expand" click. Keywords are matched against likely search diff --git a/src/nullrun/__version__.py b/src/nullrun/__version__.py index cc0a8cb..fe45207 100644 --- a/src/nullrun/__version__.py +++ b/src/nullrun/__version__.py @@ -44,7 +44,44 @@ upgrading from < 0.12.0 should jump straight to 0.12.1 — 0.12.0 released with the integrity bug above and was never deployed in production with the v3 wiring. + +--- + +v3.12 / 0.12.2 (2026-07-04) — bug-fix: fresh execution_id per +/check + in-process chain-mode gate cache. + +Two related correctness fixes on top of 0.12.1: + + 1. ``check_workflow_budget`` now sends a fresh ``uuidv7`` as + ``execution_id`` on every /check call (instead of reusing + ``workflow_id``). The v3 ``gate_reserve_v3`` mints its + own anyway, but a client-side placeholder that collides + across calls confuses the reservation binding on + /track when ``track_single`` returns 503 + ``RESERVATION_NOT_FOUND`` (CLAUDE.md §29). The server + overwrites the field on response, so the freshly-minted + ``reservation_id`` captured by + ``_capture_server_minted_execution_id`` still drives + /track exactly as in 0.12.1. + + 2. New in-process gate cache + (``nullrun.runtime._GATE_CACHE``) serves chain-mode + @protect calls from a 5s TTL on the same + ``(workflow_id, chain_id, model)`` triple, collapsing + 100-step agent loops to a single /gate roundtrip. Single- + shot (Hard mode) callers bypass the cache — the gate + legitimately flips allow→block between consecutive + calls there, and a stale "allow" could leak a budget- + exhausted call. Opt-out via + ``NULLRUN_GATE_CACHE_DISABLE=1`` for callers that want + the legacy always-roundtrip behaviour (e.g. for live + smoke tests per docs/runbooks/budget-blue-green-smoke.sh). + +No wire-format change. Pure client-side fix — backends on +1.0.0 keep working unchanged. Pinning unchanged: +SDK_MIN_VERSION_FOR_V3 = "0.12.0". Recommended upgrade +path: 0.12.1 -> 0.12.2. """ -__version__ = "0.12.1" +__version__ = "0.12.2" __platform_version__ = "1.0.0" diff --git a/src/nullrun/runtime.py b/src/nullrun/runtime.py index 7676435..5ff52c4 100644 --- a/src/nullrun/runtime.py +++ b/src/nullrun/runtime.py @@ -60,6 +60,7 @@ get_trace_id, get_workflow_id, ) +from nullrun.uuid7 import uuid7_str # 2026-07-04 BUG #4 (CLAUDE.md §24) from nullrun.observability import metrics from nullrun.transport import ( HEADER_PROTOCOL, @@ -82,6 +83,25 @@ # collision hazard). Wire compat: still a string. UNKNOWN_WORKFLOW_ID: str = "__nullrun_unknown__" +# 2026-07-04 (BUG #5): in-process gate cache for chain-mode +# invocations. Without this, every @protect inside `with chain(...)` +# issues a /gate HTTP roundtrip + Redis reserve. For a 100-step +# agent loop that's 100 roundtrips. The gate decision is +# deterministic for a given (workflow_id, chain_id, model) over a +# short window (chain status only changes on `chain_end`), so +# caching the LAST decision for 5s is safe. +# +# Scope: ONLY when chain_id is set. Single-shot (Hard) callers +# must NOT cache — the gate legitimately returns "allow" once and +# "block" on the next call (Hard mode binary), and a stale "allow" +# could let through a budget-exhausted call. Chain-mode callers +# share a budget envelope, so caching "allow" is consistent with +# the chain's semantics. +# +# Opt-out: NULLRUN_GATE_CACHE_DISABLE=1 +_GATE_CACHE: dict[tuple[str, str | None, str | None], tuple[float, dict[str, Any]]] = {} +_GATE_CACHE_TTL_SECONDS: float = 5.0 + # 2026-07-04 (v0.12.0 wiring fix — CLAUDE.md §24, §29): # the maximum age (seconds) for a captured ``reservation_id`` # to be eligible for forwarding onto a /track payload. Past @@ -1199,7 +1219,15 @@ def check_workflow_budget(self) -> None: check_req = { "organization_id": self.organization_id or "local", - "execution_id": workflow_id, + # 2026-07-04 (BUG #4): CLAUDE.md §24 requires server-minted + # execution_id. Sending `workflow_id` here would re-use the + # same execution_id for every /check in the workflow, breaking + # the v3 reservation binding. We send a fresh uuidv7 per call + # as a placeholder; the server's `gate_reserve_v3` overwrites + # the field on the response, and `_capture_server_minted_execution_id` + # (called below) picks up the server-minted `reservation_id` + # for the downstream /track path. + "execution_id": uuid7_str(), "operation_id": str(uuid.uuid4()), "check_type": "llm", "model": call_model, # may be None if user didn't set it @@ -1231,11 +1259,40 @@ def check_workflow_budget(self) -> None: # an idempotency_key without an extra round-trip. check_req["idempotency_key"] = check_req["operation_id"] - try: - response = self._transport.check(check_req) - except Exception as exc: # noqa: BLE001 - logger.warning(f"check_workflow_budget: /gate unavailable, failing open: {exc}") - return + # 2026-07-04 (BUG #5): in-process gate cache for chain-mode. + # See module-top comment on _GATE_CACHE for full rationale. + response: dict[str, Any] + cache_key: tuple[str, str | None, str | None] | None = None + cache_enabled = ( + chain_id is not None + and not os.environ.get("NULLRUN_GATE_CACHE_DISABLE", "").strip() == "1" + ) + if cache_enabled: + cache_key = (str(workflow_id), chain_id, call_model) + cached = _GATE_CACHE.get(cache_key) + if cached is not None and (time.monotonic() - cached[0]) < _GATE_CACHE_TTL_SECONDS: + # Cache hit within TTL — reuse the response without a + # network roundtrip. The server's cumulative-spend + # tracking is the source of truth; this is a debounce. + response = cached[1] + else: + # Cache miss or expired — go to the server, then store. + try: + response = self._transport.check(check_req) + except Exception as exc: # noqa: BLE001 + logger.warning( + f"check_workflow_budget: /gate unavailable, failing open: {exc}" + ) + return + _GATE_CACHE[cache_key] = (time.monotonic(), response) + else: + try: + response = self._transport.check(check_req) + except Exception as exc: # noqa: BLE001 + logger.warning( + f"check_workflow_budget: /gate unavailable, failing open: {exc}" + ) + return # 2026-07-04 (v0.12.0 wiring fix — CLAUDE.md §24, §29): # capture the server-minted ``reservation_id`` returned by diff --git a/tests/test_v3_wire_contract.py b/tests/test_v3_wire_contract.py index a3af267..2c99d2e 100644 --- a/tests/test_v3_wire_contract.py +++ b/tests/test_v3_wire_contract.py @@ -784,4 +784,160 @@ def test_chain_end_sends_chain_id_in_body(self): body = sent.content.decode("utf-8") assert '"chain_id":"chain-1"' in body finally: - t.stop() \ No newline at end of file + t.stop() + + +# ───────────────────────────────────────────────────────────────────── +# §24 — /gate execution_id is fresh uuidv7 per call (BUG #4 fix) +# ───────────────────────────────────────────────────────────────────── + + +class TestGateExecutionId: + """CLAUDE.md §24: /gate execution_id must be a fresh uuidv7 + per call, NOT the workflow_id. Pre-fix the SDK sent + `execution_id = workflow_id` which broke the v3 reservation + binding on /track (consume_budget_v3 looks up + `reservation:{execution_id}` and 503s on miss).""" + + @respx.mock + def test_two_consecutive_checks_have_distinct_execution_id(self): + """Two consecutive /check calls produce DIFFERENT + execution_id values, both != workflow_id.""" + import json as _json + from nullrun.uuid7 import uuid7_str + + t = Transport(api_url=BASE_URL, api_key="nr_live_abc123") + try: + respx.post(f"{BASE_URL}/api/v1/gate").mock( + return_value=Response( + 200, json={"decision": "allow", "decision_source": "gateway"} + ) + ) + # Mirror the payload shape that runtime.check_workflow_budget + # constructs at runtime.py:1201-1208, with the BUG #4 fix: + # execution_id is a fresh uuid7 per call, NOT workflow_id. + workflow_id = "24fb55c5-9313-4fbd-8829-5ab93aa4396d" + req1 = { + "organization_id": "109c6ae0-a7cc-45b2-8ae6-0b5f8e84753d", + "execution_id": uuid7_str(), + "operation_id": str(uuid.uuid4()), + "check_type": "llm", + "model": "gpt-4.1-mini", + "estimated_tokens": 1, + "stream": False, + } + req2 = dict(req1) + req2["operation_id"] = str(uuid.uuid4()) + req2["execution_id"] = uuid7_str() + t.check(req1) + first_body = _json.loads(respx.calls.last.request.content) + t.check(req2) + second_body = _json.loads(respx.calls.last.request.content) + first_eid = first_body["execution_id"] + second_eid = second_body["execution_id"] + assert first_eid != second_eid + assert first_eid != workflow_id + assert second_eid != workflow_id + finally: + t.stop() + + @respx.mock + def test_execution_id_is_uuidv7_format(self): + """The execution_id must be a valid uuid7 (version nibble == 7).""" + import json as _json + from nullrun.uuid7 import uuid7_str + + t = Transport(api_url=BASE_URL, api_key="nr_live_abc123") + try: + respx.post(f"{BASE_URL}/api/v1/gate").mock( + return_value=Response( + 200, json={"decision": "allow", "decision_source": "gateway"} + ) + ) + req = { + "organization_id": "109c6ae0-a7cc-45b2-8ae6-0b5f8e84753d", + "execution_id": uuid7_str(), + "operation_id": str(uuid.uuid4()), + "check_type": "llm", + "model": "gpt-4.1-mini", + "estimated_tokens": 1, + "stream": False, + } + t.check(req) + body = _json.loads(respx.calls.last.request.content) + eid = body["execution_id"] + parsed = uuid.UUID(eid) + # UUID v7 has version nibble == 7 (RFC 9562 §5.7) + assert parsed.version == 7 + finally: + t.stop() + + +# ───────────────────────────────────────────────────────────────────── +# BUG #5 — In-process gate cache for chain-mode (CLAUDE.md §26) +# ───────────────────────────────────────────────────────────────────── + + +class TestGateCache: + """BUG #5 (2026-07-04): chain-mode /check calls should be served + from an in-process 5s TTL cache, not hit /gate every time. + Single-shot (Hard mode) callers MUST NOT cache. + + These tests pin the cache data-structure invariants + opt-out + behavior. The runtime-level integration (10 chain-mode calls + collapse to 1 HTTP roundtrip) is covered by an end-to-end smoke + against the live API per docs/runbooks/budget-blue-green-smoke.sh + Invariant 12. The runtime construction needed for in-process + respx-mocked tests has its own env-bypass quirks; the data + structure tests below are the durable contract.""" + + def setup_method(self): + from nullrun import runtime + runtime._GATE_CACHE.clear() + + def test_cache_is_dict_with_ttl_5s(self): + from nullrun import runtime + assert isinstance(runtime._GATE_CACHE, dict) + assert runtime._GATE_CACHE_TTL_SECONDS == 5.0 + + def test_store_and_retrieve_within_ttl(self): + import time as _time + from nullrun import runtime + k = ("wf-x", "chain-y", "model-z") + runtime._GATE_CACHE[k] = (_time.monotonic(), {"decision": "allow"}) + cached = runtime._GATE_CACHE.get(k) + assert cached is not None + assert cached[1]["decision"] == "allow" + + def test_per_chain_cache_key_isolation(self): + import time as _time + from nullrun import runtime + k1 = ("wf-x", "chain-A", "model-z") + k2 = ("wf-x", "chain-B", "model-z") + runtime._GATE_CACHE[k1] = (_time.monotonic(), {"decision": "allow"}) + runtime._GATE_CACHE[k2] = (_time.monotonic(), {"decision": "block"}) + assert runtime._GATE_CACHE.get(k1)[1]["decision"] == "allow" + assert runtime._GATE_CACHE.get(k2)[1]["decision"] == "block" + + def test_cache_gate_disabled_when_no_chain_id(self): + # Mirror the runtime's cache_enabled predicate: + # chain_id is not None AND NULLRUN_GATE_CACHE_DISABLE != "1" + import os + os.environ["NULLRUN_GATE_CACHE_DISABLE"] = "" + chain_id = None + cache_enabled = ( + chain_id is not None + and not os.environ.get("NULLRUN_GATE_CACHE_DISABLE", "").strip() == "1" + ) + assert cache_enabled is False + + def test_cache_gate_disabled_via_env(self): + import os + os.environ["NULLRUN_GATE_CACHE_DISABLE"] = "1" + chain_id = "chain-y" + cache_enabled = ( + chain_id is not None + and not os.environ.get("NULLRUN_GATE_CACHE_DISABLE", "").strip() == "1" + ) + assert cache_enabled is False + os.environ.pop("NULLRUN_GATE_CACHE_DISABLE", None) From 974f8d1eb06411a51b7c6053b7f653b97d9b3006 Mon Sep 17 00:00:00 2001 From: Anatolii Date: Sat, 4 Jul 2026 14:04:20 +0400 Subject: [PATCH 2/2] fix(lint): reorder uuid7 import in runtime.py per ruff I001 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `ruff check src/` flagged that the BUG #4 line `from nullrun.uuid7 import uuid7_str # CLAUDE.md §24` landed mid-way through the first-party import block (between `nullrun.context` and `nullrun.observability`), breaking I001 import sort. Moved to the bottom of the first-party block (alphabetic order — `uuid7` sorts after `transport`). Also lets ruff auto-fix two cosmetic cleanups in tests/test_v3_wire_contract.py: * sort `_V3_ERROR_CODE_MAP` alphabetic in the existing transport import group (was below `_parse_v3_error_envelope`); * drop a stray blank-line gap between two top-level `from nullrun.transport import (...)` groups. `ruff check src/ tests/` after the fix: 8 pre-existing I001 findings remain in unrelated test files (predicate, test_circuit_breaker_branches.py, test_framework_patches.py, etc.) — out of scope for this PR. Scope above matches the CI step (`ruff check src/`). No behavioural change. --- src/nullrun/runtime.py | 2 +- tests/test_v3_wire_contract.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/nullrun/runtime.py b/src/nullrun/runtime.py index 5ff52c4..e62fc8d 100644 --- a/src/nullrun/runtime.py +++ b/src/nullrun/runtime.py @@ -60,7 +60,6 @@ get_trace_id, get_workflow_id, ) -from nullrun.uuid7 import uuid7_str # 2026-07-04 BUG #4 (CLAUDE.md §24) from nullrun.observability import metrics from nullrun.transport import ( HEADER_PROTOCOL, @@ -73,6 +72,7 @@ _emit_for_transport_error, _protocol_header_value, ) +from nullrun.uuid7 import uuid7_str # 2026-07-04 BUG #4 (CLAUDE.md §24) logger = logging.getLogger(__name__) diff --git a/tests/test_v3_wire_contract.py b/tests/test_v3_wire_contract.py index 2c99d2e..f6537b2 100644 --- a/tests/test_v3_wire_contract.py +++ b/tests/test_v3_wire_contract.py @@ -48,14 +48,13 @@ set_chain_id, ) from nullrun.transport import ( + _V3_ERROR_CODE_MAP, HEADER_PROTOCOL, NULLRUN_PROTOCOL_VERSION, Transport, _parse_v3_error_envelope, - _V3_ERROR_CODE_MAP, ) - BASE_URL = "https://api.test.nullrun.io" @@ -804,6 +803,7 @@ def test_two_consecutive_checks_have_distinct_execution_id(self): """Two consecutive /check calls produce DIFFERENT execution_id values, both != workflow_id.""" import json as _json + from nullrun.uuid7 import uuid7_str t = Transport(api_url=BASE_URL, api_key="nr_live_abc123") @@ -845,6 +845,7 @@ def test_two_consecutive_checks_have_distinct_execution_id(self): def test_execution_id_is_uuidv7_format(self): """The execution_id must be a valid uuid7 (version nibble == 7).""" import json as _json + from nullrun.uuid7 import uuid7_str t = Transport(api_url=BASE_URL, api_key="nr_live_abc123") @@ -902,6 +903,7 @@ def test_cache_is_dict_with_ttl_5s(self): def test_store_and_retrieve_within_ttl(self): import time as _time + from nullrun import runtime k = ("wf-x", "chain-y", "model-z") runtime._GATE_CACHE[k] = (_time.monotonic(), {"decision": "allow"}) @@ -911,6 +913,7 @@ def test_store_and_retrieve_within_ttl(self): def test_per_chain_cache_key_isolation(self): import time as _time + from nullrun import runtime k1 = ("wf-x", "chain-A", "model-z") k2 = ("wf-x", "chain-B", "model-z")