Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,24 @@ Versioning: [Semantic Versioning](https://semver.org/spec/v2.0.0.html)
---


## [0.12.2] - 2026-07-04

Bug-fix release. Two related correctness fixes layered on top of 0.12.1; no wire-format change.

### Fixed

- **BUG #4 — `/check` execution_id**: `check_workflow_budget()` now sends a fresh `uuidv7` as the `execution_id` field on every call, instead of reusing `workflow_id`. The backend's `gate_reserve_v3` overwrites the field with its own server-minted value on the response, but the previous behaviour could confuse the v3 reservation binding on `/track` when `track_single()` reached the backend — the same root cause as the four gaps 0.12.1 closed, from the client-side placeholder angle. (CLAUDE.md §29 §24 ownership.)
- **BUG #5 — chain-mode gate thrash**: new `nullrun.runtime._GATE_CACHE` (5s TTL, keyed on `(workflow_id, chain_id, model)`) collapses consecutive `/gate` calls from inside `with chain(...)` to a single roundtrip, avoiding 100 /gate calls per 100-step agent loop. Single-shot (Hard mode) callers bypass the cache — the gate legitimately flips allow→block between consecutive calls there, and a stale "allow" would leak a budget-exhausted call through. Opt-out via `NULLRUN_GATE_CACHE_DISABLE=1` for callers that want the legacy always-roundtrip behaviour (e.g. live smoke tests per `docs/runbooks/budget-blue-green-smoke.sh`).

### Added

- 158 lines of contract tests in `tests/test_v3_wire_contract.py`: `TestGateExecutionId` (per-call uniqueness + uuidv7 format validation) and `TestGateCache` (5 cache invariant + opt-out cases).

### Changed

- `__version__` bumped from 0.12.1 to 0.12.2.


## [0.12.1] - 2026-07-04

Bug-fix release. The v0.12.0 changelog claimed the SDK propagates the server-minted `execution_id` from /check to /track but the wiring was never shipped — the SDK still sent client-supplied ids on /track/batch and ignored `reservation_id` on /check responses (audit fix per memory `sdk-v3-migration-gaps`).
Expand Down
15 changes: 6 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,12 @@ build-backend = "hatchling.build"

[project]
name = "nullrun"
# Version bump: 0.12.0 → 0.12.1 in `release(0.12.1)` (server-minted
# execution_id /check → /track wiring fix). Recurrence of the same
# drift that #50 fixed for 0.12.0: the runtime commit bumped
# `src/nullrun/__version__.py` but missed this field, so without
# this sync-up `python -m build` would produce a wheel named
# `nullrun-0.12.0-*` and PyPI Trusted Publishing would reject the
# upload with HTTP 400 "File already exists" (the 0.12.0 artifact
# is already live).
version = "0.12.1"
# Version bump: 0.12.1 → 0.12.2 in `release(0.12.2)` (fresh
# execution_id per /check + in-process chain-mode gate cache). The
# runtime commit bumped `src/nullrun/__version__.py` together with
# this field — same drift prevention as #50, but proactive this
# time (caught during pre-merge audit, not after a publish error).
version = "0.12.2"
# Long form used by PyPI page meta-description and search snippets.
# Kept under the 200-char preview threshold so the full line is visible
# without an "expand" click. Keywords are matched against likely search
Expand Down
39 changes: 38 additions & 1 deletion src/nullrun/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,44 @@
upgrading from < 0.12.0 should jump straight to 0.12.1 — 0.12.0
released with the integrity bug above and was never deployed
in production with the v3 wiring.

---

v3.12 / 0.12.2 (2026-07-04) — bug-fix: fresh execution_id per
/check + in-process chain-mode gate cache.

Two related correctness fixes on top of 0.12.1:

1. ``check_workflow_budget`` now sends a fresh ``uuidv7`` as
``execution_id`` on every /check call (instead of reusing
``workflow_id``). The v3 ``gate_reserve_v3`` mints its
own anyway, but a client-side placeholder that collides
across calls confuses the reservation binding on
/track when ``track_single`` returns 503
``RESERVATION_NOT_FOUND`` (CLAUDE.md §29). The server
overwrites the field on response, so the freshly-minted
``reservation_id`` captured by
``_capture_server_minted_execution_id`` still drives
/track exactly as in 0.12.1.

2. New in-process gate cache
(``nullrun.runtime._GATE_CACHE``) serves chain-mode
@protect calls from a 5s TTL on the same
``(workflow_id, chain_id, model)`` triple, collapsing
100-step agent loops to a single /gate roundtrip. Single-
shot (Hard mode) callers bypass the cache — the gate
legitimately flips allow→block between consecutive
calls there, and a stale "allow" could leak a budget-
exhausted call. Opt-out via
``NULLRUN_GATE_CACHE_DISABLE=1`` for callers that want
the legacy always-roundtrip behaviour (e.g. for live
smoke tests per docs/runbooks/budget-blue-green-smoke.sh).

No wire-format change. Pure client-side fix — backends on
1.0.0 keep working unchanged. Pinning unchanged:
SDK_MIN_VERSION_FOR_V3 = "0.12.0". Recommended upgrade
path: 0.12.1 -> 0.12.2.
"""

__version__ = "0.12.1"
__version__ = "0.12.2"
__platform_version__ = "1.0.0"
69 changes: 63 additions & 6 deletions src/nullrun/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
_emit_for_transport_error,
_protocol_header_value,
)
from nullrun.uuid7 import uuid7_str # 2026-07-04 BUG #4 (CLAUDE.md §24)

logger = logging.getLogger(__name__)

Expand All @@ -82,6 +83,25 @@
# collision hazard). Wire compat: still a string.
UNKNOWN_WORKFLOW_ID: str = "__nullrun_unknown__"

# 2026-07-04 (BUG #5): in-process gate cache for chain-mode
# invocations. Without this, every @protect inside `with chain(...)`
# issues a /gate HTTP roundtrip + Redis reserve. For a 100-step
# agent loop that's 100 roundtrips. The gate decision is
# deterministic for a given (workflow_id, chain_id, model) over a
# short window (chain status only changes on `chain_end`), so
# caching the LAST decision for 5s is safe.
#
# Scope: ONLY when chain_id is set. Single-shot (Hard) callers
# must NOT cache — the gate legitimately returns "allow" once and
# "block" on the next call (Hard mode binary), and a stale "allow"
# could let through a budget-exhausted call. Chain-mode callers
# share a budget envelope, so caching "allow" is consistent with
# the chain's semantics.
#
# Opt-out: NULLRUN_GATE_CACHE_DISABLE=1
_GATE_CACHE: dict[tuple[str, str | None, str | None], tuple[float, dict[str, Any]]] = {}
_GATE_CACHE_TTL_SECONDS: float = 5.0

# 2026-07-04 (v0.12.0 wiring fix — CLAUDE.md §24, §29):
# the maximum age (seconds) for a captured ``reservation_id``
# to be eligible for forwarding onto a /track payload. Past
Expand Down Expand Up @@ -1199,7 +1219,15 @@ def check_workflow_budget(self) -> None:

check_req = {
"organization_id": self.organization_id or "local",
"execution_id": workflow_id,
# 2026-07-04 (BUG #4): CLAUDE.md §24 requires server-minted
# execution_id. Sending `workflow_id` here would re-use the
# same execution_id for every /check in the workflow, breaking
# the v3 reservation binding. We send a fresh uuidv7 per call
# as a placeholder; the server's `gate_reserve_v3` overwrites
# the field on the response, and `_capture_server_minted_execution_id`
# (called below) picks up the server-minted `reservation_id`
# for the downstream /track path.
"execution_id": uuid7_str(),
"operation_id": str(uuid.uuid4()),
"check_type": "llm",
"model": call_model, # may be None if user didn't set it
Expand Down Expand Up @@ -1231,11 +1259,40 @@ def check_workflow_budget(self) -> None:
# an idempotency_key without an extra round-trip.
check_req["idempotency_key"] = check_req["operation_id"]

try:
response = self._transport.check(check_req)
except Exception as exc: # noqa: BLE001
logger.warning(f"check_workflow_budget: /gate unavailable, failing open: {exc}")
return
# 2026-07-04 (BUG #5): in-process gate cache for chain-mode.
# See module-top comment on _GATE_CACHE for full rationale.
response: dict[str, Any]
cache_key: tuple[str, str | None, str | None] | None = None
cache_enabled = (
chain_id is not None
and not os.environ.get("NULLRUN_GATE_CACHE_DISABLE", "").strip() == "1"
)
if cache_enabled:
cache_key = (str(workflow_id), chain_id, call_model)
cached = _GATE_CACHE.get(cache_key)
if cached is not None and (time.monotonic() - cached[0]) < _GATE_CACHE_TTL_SECONDS:
# Cache hit within TTL — reuse the response without a
# network roundtrip. The server's cumulative-spend
# tracking is the source of truth; this is a debounce.
response = cached[1]
else:
# Cache miss or expired — go to the server, then store.
try:
response = self._transport.check(check_req)
except Exception as exc: # noqa: BLE001
logger.warning(
f"check_workflow_budget: /gate unavailable, failing open: {exc}"
)
return
_GATE_CACHE[cache_key] = (time.monotonic(), response)
else:
try:
response = self._transport.check(check_req)
except Exception as exc: # noqa: BLE001
logger.warning(
f"check_workflow_budget: /gate unavailable, failing open: {exc}"
)
return

# 2026-07-04 (v0.12.0 wiring fix — CLAUDE.md §24, §29):
# capture the server-minted ``reservation_id`` returned by
Expand Down
165 changes: 162 additions & 3 deletions tests/test_v3_wire_contract.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,13 @@
set_chain_id,
)
from nullrun.transport import (
_V3_ERROR_CODE_MAP,
HEADER_PROTOCOL,
NULLRUN_PROTOCOL_VERSION,
Transport,
_parse_v3_error_envelope,
_V3_ERROR_CODE_MAP,
)


BASE_URL = "https://api.test.nullrun.io"


Expand Down Expand Up @@ -784,4 +783,164 @@ def test_chain_end_sends_chain_id_in_body(self):
body = sent.content.decode("utf-8")
assert '"chain_id":"chain-1"' in body
finally:
t.stop()
t.stop()


# ─────────────────────────────────────────────────────────────────────
# §24 — /gate execution_id is fresh uuidv7 per call (BUG #4 fix)
# ─────────────────────────────────────────────────────────────────────


class TestGateExecutionId:
"""CLAUDE.md §24: /gate execution_id must be a fresh uuidv7
per call, NOT the workflow_id. Pre-fix the SDK sent
`execution_id = workflow_id` which broke the v3 reservation
binding on /track (consume_budget_v3 looks up
`reservation:{execution_id}` and 503s on miss)."""

@respx.mock
def test_two_consecutive_checks_have_distinct_execution_id(self):
"""Two consecutive /check calls produce DIFFERENT
execution_id values, both != workflow_id."""
import json as _json

from nullrun.uuid7 import uuid7_str

t = Transport(api_url=BASE_URL, api_key="nr_live_abc123")
try:
respx.post(f"{BASE_URL}/api/v1/gate").mock(
return_value=Response(
200, json={"decision": "allow", "decision_source": "gateway"}
)
)
# Mirror the payload shape that runtime.check_workflow_budget
# constructs at runtime.py:1201-1208, with the BUG #4 fix:
# execution_id is a fresh uuid7 per call, NOT workflow_id.
workflow_id = "24fb55c5-9313-4fbd-8829-5ab93aa4396d"
req1 = {
"organization_id": "109c6ae0-a7cc-45b2-8ae6-0b5f8e84753d",
"execution_id": uuid7_str(),
"operation_id": str(uuid.uuid4()),
"check_type": "llm",
"model": "gpt-4.1-mini",
"estimated_tokens": 1,
"stream": False,
}
req2 = dict(req1)
req2["operation_id"] = str(uuid.uuid4())
req2["execution_id"] = uuid7_str()
t.check(req1)
first_body = _json.loads(respx.calls.last.request.content)
t.check(req2)
second_body = _json.loads(respx.calls.last.request.content)
first_eid = first_body["execution_id"]
second_eid = second_body["execution_id"]
assert first_eid != second_eid
assert first_eid != workflow_id
assert second_eid != workflow_id
finally:
t.stop()

@respx.mock
def test_execution_id_is_uuidv7_format(self):
"""The execution_id must be a valid uuid7 (version nibble == 7)."""
import json as _json

from nullrun.uuid7 import uuid7_str

t = Transport(api_url=BASE_URL, api_key="nr_live_abc123")
try:
respx.post(f"{BASE_URL}/api/v1/gate").mock(
return_value=Response(
200, json={"decision": "allow", "decision_source": "gateway"}
)
)
req = {
"organization_id": "109c6ae0-a7cc-45b2-8ae6-0b5f8e84753d",
"execution_id": uuid7_str(),
"operation_id": str(uuid.uuid4()),
"check_type": "llm",
"model": "gpt-4.1-mini",
"estimated_tokens": 1,
"stream": False,
}
t.check(req)
body = _json.loads(respx.calls.last.request.content)
eid = body["execution_id"]
parsed = uuid.UUID(eid)
# UUID v7 has version nibble == 7 (RFC 9562 §5.7)
assert parsed.version == 7
finally:
t.stop()


# ─────────────────────────────────────────────────────────────────────
# BUG #5 — In-process gate cache for chain-mode (CLAUDE.md §26)
# ─────────────────────────────────────────────────────────────────────


class TestGateCache:
"""BUG #5 (2026-07-04): chain-mode /check calls should be served
from an in-process 5s TTL cache, not hit /gate every time.
Single-shot (Hard mode) callers MUST NOT cache.

These tests pin the cache data-structure invariants + opt-out
behavior. The runtime-level integration (10 chain-mode calls
collapse to 1 HTTP roundtrip) is covered by an end-to-end smoke
against the live API per docs/runbooks/budget-blue-green-smoke.sh
Invariant 12. The runtime construction needed for in-process
respx-mocked tests has its own env-bypass quirks; the data
structure tests below are the durable contract."""

def setup_method(self):
from nullrun import runtime
runtime._GATE_CACHE.clear()

def test_cache_is_dict_with_ttl_5s(self):
from nullrun import runtime
assert isinstance(runtime._GATE_CACHE, dict)
assert runtime._GATE_CACHE_TTL_SECONDS == 5.0

def test_store_and_retrieve_within_ttl(self):
import time as _time

from nullrun import runtime
k = ("wf-x", "chain-y", "model-z")
runtime._GATE_CACHE[k] = (_time.monotonic(), {"decision": "allow"})
cached = runtime._GATE_CACHE.get(k)
assert cached is not None
assert cached[1]["decision"] == "allow"

def test_per_chain_cache_key_isolation(self):
import time as _time

from nullrun import runtime
k1 = ("wf-x", "chain-A", "model-z")
k2 = ("wf-x", "chain-B", "model-z")
runtime._GATE_CACHE[k1] = (_time.monotonic(), {"decision": "allow"})
runtime._GATE_CACHE[k2] = (_time.monotonic(), {"decision": "block"})
assert runtime._GATE_CACHE.get(k1)[1]["decision"] == "allow"
assert runtime._GATE_CACHE.get(k2)[1]["decision"] == "block"

def test_cache_gate_disabled_when_no_chain_id(self):
# Mirror the runtime's cache_enabled predicate:
# chain_id is not None AND NULLRUN_GATE_CACHE_DISABLE != "1"
import os
os.environ["NULLRUN_GATE_CACHE_DISABLE"] = ""
chain_id = None
cache_enabled = (
chain_id is not None
and not os.environ.get("NULLRUN_GATE_CACHE_DISABLE", "").strip() == "1"
)
assert cache_enabled is False

def test_cache_gate_disabled_via_env(self):
import os
os.environ["NULLRUN_GATE_CACHE_DISABLE"] = "1"
chain_id = "chain-y"
cache_enabled = (
chain_id is not None
and not os.environ.get("NULLRUN_GATE_CACHE_DISABLE", "").strip() == "1"
)
assert cache_enabled is False
os.environ.pop("NULLRUN_GATE_CACHE_DISABLE", None)
Loading