From ebe5447d0fac27e3b5b12c97b37fbf9494103df3 Mon Sep 17 00:00:00 2001 From: waleed Date: Tue, 16 Jun 2026 14:12:08 -0700 Subject: [PATCH 1/6] feat(providers): prompt caching for Anthropic + Azure-Anthropic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark the static request prefix (system prompt + tools) with an ephemeral cache_control breakpoint so repeated calls — agent tool-loops and multi-turn — reuse the cached prefix (~90% cheaper cached input + lower latency). Azure- Anthropic inherits this via the shared core. - New providers/prompt-cache.ts gate: only caches when the static prefix is large enough to be cacheable AND likely reused (tools present, or a large system prompt), so a one-shot tool-less call never pays the cache-write surcharge. Kill switch: PROMPT_CACHE_DISABLED=true. - anthropic/core.ts: convert system string -> a cached text block (after the structured-output concat, which assumes a string) and tag the last tool. Uses 2 of Anthropic's 4 breakpoints; the tool-loop reuses the tagged payload. - Outputs are unchanged; cost accounting already reads cache_read/creation tokens (buildAnthropicSegmentTokens), so usage stays accurate. Matches the AI SDK / LangChain / Spring AI convention (explicit breakpoints for Claude; automatic for OpenAI/Gemini). Bedrock + OpenRouter to follow (they need cache-token accounting alongside). --- apps/sim/providers/anthropic/core.ts | 22 ++++++++++ apps/sim/providers/prompt-cache.test.ts | 58 +++++++++++++++++++++++++ apps/sim/providers/prompt-cache.ts | 55 +++++++++++++++++++++++ 3 files changed, 135 insertions(+) create mode 100644 apps/sim/providers/prompt-cache.test.ts create mode 100644 apps/sim/providers/prompt-cache.ts diff --git a/apps/sim/providers/anthropic/core.ts b/apps/sim/providers/anthropic/core.ts index 57056e6acc..292eb132b6 100644 --- a/apps/sim/providers/anthropic/core.ts +++ b/apps/sim/providers/anthropic/core.ts @@ -16,6 +16,7 @@ import { supportsNativeStructuredOutputs, supportsTemperature, } from '@/providers/models' +import { shouldCacheStaticPrefix } from '@/providers/prompt-cache' import { createStreamingExecution } from '@/providers/streaming-execution' import { adaptAnthropicToolSchema } from '@/providers/tool-schema-adapter' import { enrichLastModelSegment } from '@/providers/trace-enrichment' @@ -324,6 +325,20 @@ export async function executeAnthropicProviderRequest( } } + // Prompt caching: mark the static prefix (system + tools) with an ephemeral + // cache breakpoint so repeated calls (agent tool-loops, multi-turn) reuse it. + // Must run after the structured-output block above, which assumes `system` is + // still a string. Tools are tagged at their assignment below. + const cacheStaticPrefix = shouldCacheStaticPrefix({ + systemPrompt: typeof payload.system === 'string' ? payload.system : '', + hasTools: !!anthropicTools?.length, + toolsApproxChars: anthropicTools ? JSON.stringify(anthropicTools).length : 0, + }) + + if (cacheStaticPrefix && typeof payload.system === 'string' && payload.system.length > 0) { + payload.system = [{ type: 'text', text: payload.system, cache_control: { type: 'ephemeral' } }] + } + // Add extended thinking configuration if supported and requested // The 'none' sentinel means "disable thinking" — skip configuration entirely. if (request.thinkingLevel && request.thinkingLevel !== 'none') { @@ -366,6 +381,13 @@ export async function executeAnthropicProviderRequest( } if (anthropicTools?.length) { + if (cacheStaticPrefix) { + const lastIndex = anthropicTools.length - 1 + anthropicTools[lastIndex] = { + ...anthropicTools[lastIndex], + cache_control: { type: 'ephemeral' }, + } + } payload.tools = anthropicTools // Per Anthropic docs: forced tool_choice (type: "tool" or "any") is incompatible with // thinking. Only auto and none are supported when thinking is enabled. diff --git a/apps/sim/providers/prompt-cache.test.ts b/apps/sim/providers/prompt-cache.test.ts new file mode 100644 index 0000000000..8999a2a252 --- /dev/null +++ b/apps/sim/providers/prompt-cache.test.ts @@ -0,0 +1,58 @@ +/** + * @vitest-environment node + */ +import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { shouldCacheStaticPrefix } from '@/providers/prompt-cache' + +const LARGE = 'x'.repeat(8_000) // ~2,000 est. tokens, above the 1,024 gate +const SMALL = 'x'.repeat(400) // ~100 est. tokens, below the gate + +describe('shouldCacheStaticPrefix', () => { + const original = process.env.PROMPT_CACHE_DISABLED + + beforeEach(() => { + process.env.PROMPT_CACHE_DISABLED = undefined + }) + + afterEach(() => { + process.env.PROMPT_CACHE_DISABLED = original + }) + + it('caches a large system prompt that has tools (agent loop)', () => { + expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: true })).toBe(true) + }) + + it('caches a large system prompt even without tools', () => { + expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: false })).toBe(true) + }) + + it('reaches the threshold via tools when the system prompt alone is below it', () => { + // Small system + large serialized tools clears the combined threshold, and + // tools imply reuse, so it should cache. + expect( + shouldCacheStaticPrefix({ systemPrompt: SMALL, hasTools: true, toolsApproxChars: 8_000 }) + ).toBe(true) + }) + + it('does NOT cache a small, tool-less prompt (one-shot write surcharge avoided)', () => { + expect(shouldCacheStaticPrefix({ systemPrompt: SMALL, hasTools: false })).toBe(false) + }) + + it('does NOT cache a small system even with tools when the combined prefix is below threshold', () => { + expect( + shouldCacheStaticPrefix({ systemPrompt: SMALL, hasTools: true, toolsApproxChars: 400 }) + ).toBe(false) + }) + + it('does NOT cache when there is no system prompt', () => { + expect( + shouldCacheStaticPrefix({ systemPrompt: '', hasTools: true, toolsApproxChars: 8_000 }) + ).toBe(false) + expect(shouldCacheStaticPrefix({ systemPrompt: null, hasTools: true })).toBe(false) + }) + + it('is disabled by the PROMPT_CACHE_DISABLED kill switch', () => { + process.env.PROMPT_CACHE_DISABLED = 'true' + expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: true })).toBe(false) + }) +}) diff --git a/apps/sim/providers/prompt-cache.ts b/apps/sim/providers/prompt-cache.ts new file mode 100644 index 0000000000..21375c39ca --- /dev/null +++ b/apps/sim/providers/prompt-cache.ts @@ -0,0 +1,55 @@ +import { getEnv, isTruthy } from '@/lib/core/config/env' + +/** + * Minimum estimated static-prefix size (system + tool definitions) before it is + * worth marking a prompt-cache breakpoint. This is a rough lower bound across + * Claude models (some require more); below it, providers silently skip caching + * anyway, so this only avoids spending a breakpoint on a trivially small prefix. + */ +const MIN_CACHEABLE_PREFIX_TOKENS = 1024 + +/** Rough token estimate (~4 chars/token) — fast and good enough for a gate. */ +function estimateTokens(text: string): number { + return Math.ceil(text.length / 4) +} + +/** + * Decides whether to inject prompt-cache breakpoints on the static prefix + * (system prompt + tool definitions) for providers that require explicit cache + * control (Anthropic, Bedrock, and Anthropic models via OpenRouter). + * + * Caching only pays off when the prefix is large enough to be cacheable AND is + * actually re-read: agent tool-loops re-send the prefix on every iteration, and + * a large system prompt is typically reused across runs within the cache TTL. + * A small, tool-less prompt is intentionally skipped so a one-shot call never + * pays the cache-write surcharge for a prefix that is never read back. + * + * Set `PROMPT_CACHE_DISABLED=true` to turn this off globally (kill switch). + */ +export function shouldCacheStaticPrefix(params: { + systemPrompt: string | null | undefined + hasTools: boolean + toolsApproxChars?: number +}): boolean { + if (isTruthy(getEnv('PROMPT_CACHE_DISABLED'))) { + return false + } + + const system = params.systemPrompt ?? '' + if (!system) { + return false + } + + const systemTokens = estimateTokens(system) + const toolTokens = params.toolsApproxChars ? Math.ceil(params.toolsApproxChars / 4) : 0 + const prefixTokens = systemTokens + toolTokens + + if (prefixTokens < MIN_CACHEABLE_PREFIX_TOKENS) { + return false + } + + // Tools imply an agent loop (the prefix is re-read each iteration). Without + // tools, only cache when the system prompt alone is large enough to be worth + // the write on its own. + return params.hasTools || systemTokens >= MIN_CACHEABLE_PREFIX_TOKENS +} From 3a449361f1228fc5e11ff8a304cbdeccec0aeccf Mon Sep 17 00:00:00 2001 From: waleed Date: Tue, 16 Jun 2026 14:55:30 -0700 Subject: [PATCH 2/6] fix(providers): cache gate uses request system prompt; test uses vi.stubEnv - anthropic/core.ts: gate on request.systemPrompt instead of payload.system, so the no-messages path (where the system text is relocated into a user message and payload.system is blanked) still caches the tools prefix. (Cursor review) - prompt-cache.test.ts: manage the kill-switch env via vi.stubEnv/unstubAllEnvs instead of assigning undefined (which coerces to "undefined" and leaks across workers). Addresses the Greptile finding while satisfying biome's noDelete rule. --- apps/sim/providers/anthropic/core.ts | 6 +++++- apps/sim/providers/prompt-cache.test.ts | 13 +++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/apps/sim/providers/anthropic/core.ts b/apps/sim/providers/anthropic/core.ts index 292eb132b6..717b9f93dc 100644 --- a/apps/sim/providers/anthropic/core.ts +++ b/apps/sim/providers/anthropic/core.ts @@ -329,8 +329,12 @@ export async function executeAnthropicProviderRequest( // cache breakpoint so repeated calls (agent tool-loops, multi-turn) reuse it. // Must run after the structured-output block above, which assumes `system` is // still a string. Tools are tagged at their assignment below. + // Gate on the original request system prompt, not payload.system: when there + // are no context/chat messages the system text is relocated into a user + // message and payload.system is blanked (see above), but the prefix is still + // worth caching (the tools, at least). const cacheStaticPrefix = shouldCacheStaticPrefix({ - systemPrompt: typeof payload.system === 'string' ? payload.system : '', + systemPrompt: request.systemPrompt, hasTools: !!anthropicTools?.length, toolsApproxChars: anthropicTools ? JSON.stringify(anthropicTools).length : 0, }) diff --git a/apps/sim/providers/prompt-cache.test.ts b/apps/sim/providers/prompt-cache.test.ts index 8999a2a252..18e59f6b81 100644 --- a/apps/sim/providers/prompt-cache.test.ts +++ b/apps/sim/providers/prompt-cache.test.ts @@ -1,21 +1,22 @@ /** * @vitest-environment node */ -import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' import { shouldCacheStaticPrefix } from '@/providers/prompt-cache' const LARGE = 'x'.repeat(8_000) // ~2,000 est. tokens, above the 1,024 gate const SMALL = 'x'.repeat(400) // ~100 est. tokens, below the gate describe('shouldCacheStaticPrefix', () => { - const original = process.env.PROMPT_CACHE_DISABLED - + // vi.stubEnv cleanly sets/restores the kill switch without `delete` (which + // biome rewrites) or assigning `undefined` (which coerces to the string + // "undefined" and leaks to other tests in the worker). beforeEach(() => { - process.env.PROMPT_CACHE_DISABLED = undefined + vi.stubEnv('PROMPT_CACHE_DISABLED', '') }) afterEach(() => { - process.env.PROMPT_CACHE_DISABLED = original + vi.unstubAllEnvs() }) it('caches a large system prompt that has tools (agent loop)', () => { @@ -52,7 +53,7 @@ describe('shouldCacheStaticPrefix', () => { }) it('is disabled by the PROMPT_CACHE_DISABLED kill switch', () => { - process.env.PROMPT_CACHE_DISABLED = 'true' + vi.stubEnv('PROMPT_CACHE_DISABLED', 'true') expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: true })).toBe(false) }) }) From 3855e0424874cc958dae52f30e366a0404c1a7e9 Mon Sep 17 00:00:00 2001 From: waleed Date: Tue, 16 Jun 2026 15:13:52 -0700 Subject: [PATCH 3/6] refactor(providers): always-on prompt caching via a directly-tested helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove the PROMPT_CACHE_DISABLED kill switch — prompt caching is always on. - Extract the Anthropic tagging into applyAnthropicPromptCache(payload, tools, systemPrompt) in anthropic/utils.ts: one place that gates and mutates the system block + last tool, replacing the two inline blocks in core.ts. - Add direct unit tests for the helper (system→cached block, last-tool tagged, relocated/blanked-system still tags tools, below-threshold and tool-less cases untouched) so the actual payload mutation is verified, not just the gate. No behavior change to outputs; verified on vitest 4.1.8 (CI's version). --- apps/sim/providers/anthropic/core.ts | 27 ++------ apps/sim/providers/anthropic/utils.test.ts | 76 ++++++++++++++++++++++ apps/sim/providers/anthropic/utils.ts | 43 ++++++++++++ apps/sim/providers/prompt-cache.test.ts | 18 +---- apps/sim/providers/prompt-cache.ts | 8 --- 5 files changed, 124 insertions(+), 48 deletions(-) create mode 100644 apps/sim/providers/anthropic/utils.test.ts diff --git a/apps/sim/providers/anthropic/core.ts b/apps/sim/providers/anthropic/core.ts index 717b9f93dc..86b1247727 100644 --- a/apps/sim/providers/anthropic/core.ts +++ b/apps/sim/providers/anthropic/core.ts @@ -6,6 +6,7 @@ import { getErrorMessage, toError } from '@sim/utils/errors' import type { BlockTokens, IterationToolCall, StreamingExecution } from '@/executor/types' import { MAX_TOOL_ITERATIONS } from '@/providers' import { + applyAnthropicPromptCache, checkForForcedToolUsage, createReadableStreamFromAnthropicStream, } from '@/providers/anthropic/utils' @@ -16,7 +17,6 @@ import { supportsNativeStructuredOutputs, supportsTemperature, } from '@/providers/models' -import { shouldCacheStaticPrefix } from '@/providers/prompt-cache' import { createStreamingExecution } from '@/providers/streaming-execution' import { adaptAnthropicToolSchema } from '@/providers/tool-schema-adapter' import { enrichLastModelSegment } from '@/providers/trace-enrichment' @@ -327,21 +327,9 @@ export async function executeAnthropicProviderRequest( // Prompt caching: mark the static prefix (system + tools) with an ephemeral // cache breakpoint so repeated calls (agent tool-loops, multi-turn) reuse it. - // Must run after the structured-output block above, which assumes `system` is - // still a string. Tools are tagged at their assignment below. - // Gate on the original request system prompt, not payload.system: when there - // are no context/chat messages the system text is relocated into a user - // message and payload.system is blanked (see above), but the prefix is still - // worth caching (the tools, at least). - const cacheStaticPrefix = shouldCacheStaticPrefix({ - systemPrompt: request.systemPrompt, - hasTools: !!anthropicTools?.length, - toolsApproxChars: anthropicTools ? JSON.stringify(anthropicTools).length : 0, - }) - - if (cacheStaticPrefix && typeof payload.system === 'string' && payload.system.length > 0) { - payload.system = [{ type: 'text', text: payload.system, cache_control: { type: 'ephemeral' } }] - } + // Runs after the structured-output block above, which assumes `system` is still + // a string. Mutates payload.system and the last tool in place. + applyAnthropicPromptCache(payload, anthropicTools, request.systemPrompt) // Add extended thinking configuration if supported and requested // The 'none' sentinel means "disable thinking" — skip configuration entirely. @@ -385,13 +373,6 @@ export async function executeAnthropicProviderRequest( } if (anthropicTools?.length) { - if (cacheStaticPrefix) { - const lastIndex = anthropicTools.length - 1 - anthropicTools[lastIndex] = { - ...anthropicTools[lastIndex], - cache_control: { type: 'ephemeral' }, - } - } payload.tools = anthropicTools // Per Anthropic docs: forced tool_choice (type: "tool" or "any") is incompatible with // thinking. Only auto and none are supported when thinking is enabled. diff --git a/apps/sim/providers/anthropic/utils.test.ts b/apps/sim/providers/anthropic/utils.test.ts new file mode 100644 index 0000000000..53cf8eabd2 --- /dev/null +++ b/apps/sim/providers/anthropic/utils.test.ts @@ -0,0 +1,76 @@ +/** + * @vitest-environment node + */ +import type { TextBlockParam, Tool } from '@anthropic-ai/sdk/resources' +import { describe, expect, it } from 'vitest' +import { applyAnthropicPromptCache } from '@/providers/anthropic/utils' + +const LARGE = 'x'.repeat(8_000) // ~2,000 est. tokens, above the 1,024 gate +const SMALL = 'x'.repeat(400) // ~100 est. tokens, below the gate + +const tool = (name: string): Tool => ({ + name, + description: 'does a thing', + input_schema: { type: 'object', properties: {} }, +}) + +describe('applyAnthropicPromptCache', () => { + it('converts a large system prompt to a cached text block and tags the last tool', () => { + const payload: { system?: string | TextBlockParam[] } = { system: LARGE } + const tools = [tool('a'), tool('b')] + + applyAnthropicPromptCache(payload, tools, LARGE) + + expect(Array.isArray(payload.system)).toBe(true) + const blocks = payload.system as TextBlockParam[] + expect(blocks).toHaveLength(1) + expect(blocks[0]).toMatchObject({ + type: 'text', + text: LARGE, + cache_control: { type: 'ephemeral' }, + }) + // Only the LAST tool carries the breakpoint; earlier tools are untouched. + expect(tools[0].cache_control).toBeUndefined() + expect(tools[1].cache_control).toEqual({ type: 'ephemeral' }) + }) + + it('tags the system block when the system alone is large and there are no tools', () => { + const payload: { system?: string | TextBlockParam[] } = { system: LARGE } + + applyAnthropicPromptCache(payload, undefined, LARGE) + + const blocks = payload.system as TextBlockParam[] + expect(blocks[0].cache_control).toEqual({ type: 'ephemeral' }) + }) + + it('tags the tools even when payload.system was relocated/blanked (gate uses the request prompt)', () => { + // No-messages path: the provider moves the system text into a user message + // and blanks payload.system, but the original prompt is large, so the tools + // prefix is still worth caching. + const payload: { system?: string | TextBlockParam[] } = { system: '' } + const tools = [tool('a')] + + applyAnthropicPromptCache(payload, tools, LARGE) + + expect(payload.system).toBe('') // empty system is never converted + expect(tools[0].cache_control).toEqual({ type: 'ephemeral' }) + }) + + it('leaves a small, tool-less prefix untouched (no write surcharge on one-shot calls)', () => { + const payload: { system?: string | TextBlockParam[] } = { system: SMALL } + + applyAnthropicPromptCache(payload, undefined, SMALL) + + expect(payload.system).toBe(SMALL) + }) + + it('does nothing when the combined prefix is below the threshold', () => { + const payload: { system?: string | TextBlockParam[] } = { system: SMALL } + const tools = [tool('a')] + + applyAnthropicPromptCache(payload, tools, SMALL) + + expect(payload.system).toBe(SMALL) + expect(tools[0].cache_control).toBeUndefined() + }) +}) diff --git a/apps/sim/providers/anthropic/utils.ts b/apps/sim/providers/anthropic/utils.ts index b9b001bb7a..a4384c8bfa 100644 --- a/apps/sim/providers/anthropic/utils.ts +++ b/apps/sim/providers/anthropic/utils.ts @@ -2,14 +2,57 @@ import type { RawMessageDeltaEvent, RawMessageStartEvent, RawMessageStreamEvent, + TextBlockParam, + Tool, Usage, } from '@anthropic-ai/sdk/resources' import { createLogger } from '@sim/logger' import { randomFloat } from '@sim/utils/random' +import { shouldCacheStaticPrefix } from '@/providers/prompt-cache' import { trackForcedToolUsage } from '@/providers/utils' const logger = createLogger('AnthropicUtils') +/** Mutable view of the parts of the Anthropic payload that carry cache breakpoints. */ +interface AnthropicCacheablePayload { + system?: string | Array +} + +/** + * Marks the static request prefix (system prompt + tools) with an ephemeral + * cache breakpoint when {@link shouldCacheStaticPrefix} deems it worthwhile, so + * repeated calls reuse the cached prefix. Mutates `payload.system` (string → a + * single cached text block) and the last entry of `tools` in place. + * + * `systemPrompt` is the ORIGINAL request system prompt, used only for the + * worthiness gate: on the no-messages path the provider relocates the system + * text into a user message and blanks `payload.system`, but the tools prefix is + * still worth caching there. + */ +export function applyAnthropicPromptCache( + payload: AnthropicCacheablePayload, + tools: Tool[] | undefined, + systemPrompt: string | null | undefined +): void { + const shouldCache = shouldCacheStaticPrefix({ + systemPrompt, + hasTools: !!tools?.length, + toolsApproxChars: tools ? JSON.stringify(tools).length : 0, + }) + if (!shouldCache) { + return + } + + if (typeof payload.system === 'string' && payload.system.length > 0) { + payload.system = [{ type: 'text', text: payload.system, cache_control: { type: 'ephemeral' } }] + } + + if (tools?.length) { + const lastIndex = tools.length - 1 + tools[lastIndex] = { ...tools[lastIndex], cache_control: { type: 'ephemeral' } } + } +} + export interface AnthropicStreamUsage { input_tokens: number output_tokens: number diff --git a/apps/sim/providers/prompt-cache.test.ts b/apps/sim/providers/prompt-cache.test.ts index 18e59f6b81..7d4590ebb4 100644 --- a/apps/sim/providers/prompt-cache.test.ts +++ b/apps/sim/providers/prompt-cache.test.ts @@ -1,24 +1,13 @@ /** * @vitest-environment node */ -import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' +import { describe, expect, it } from 'vitest' import { shouldCacheStaticPrefix } from '@/providers/prompt-cache' const LARGE = 'x'.repeat(8_000) // ~2,000 est. tokens, above the 1,024 gate const SMALL = 'x'.repeat(400) // ~100 est. tokens, below the gate describe('shouldCacheStaticPrefix', () => { - // vi.stubEnv cleanly sets/restores the kill switch without `delete` (which - // biome rewrites) or assigning `undefined` (which coerces to the string - // "undefined" and leaks to other tests in the worker). - beforeEach(() => { - vi.stubEnv('PROMPT_CACHE_DISABLED', '') - }) - - afterEach(() => { - vi.unstubAllEnvs() - }) - it('caches a large system prompt that has tools (agent loop)', () => { expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: true })).toBe(true) }) @@ -51,9 +40,4 @@ describe('shouldCacheStaticPrefix', () => { ).toBe(false) expect(shouldCacheStaticPrefix({ systemPrompt: null, hasTools: true })).toBe(false) }) - - it('is disabled by the PROMPT_CACHE_DISABLED kill switch', () => { - vi.stubEnv('PROMPT_CACHE_DISABLED', 'true') - expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: true })).toBe(false) - }) }) diff --git a/apps/sim/providers/prompt-cache.ts b/apps/sim/providers/prompt-cache.ts index 21375c39ca..e741ecbfa6 100644 --- a/apps/sim/providers/prompt-cache.ts +++ b/apps/sim/providers/prompt-cache.ts @@ -1,5 +1,3 @@ -import { getEnv, isTruthy } from '@/lib/core/config/env' - /** * Minimum estimated static-prefix size (system + tool definitions) before it is * worth marking a prompt-cache breakpoint. This is a rough lower bound across @@ -23,18 +21,12 @@ function estimateTokens(text: string): number { * a large system prompt is typically reused across runs within the cache TTL. * A small, tool-less prompt is intentionally skipped so a one-shot call never * pays the cache-write surcharge for a prefix that is never read back. - * - * Set `PROMPT_CACHE_DISABLED=true` to turn this off globally (kill switch). */ export function shouldCacheStaticPrefix(params: { systemPrompt: string | null | undefined hasTools: boolean toolsApproxChars?: number }): boolean { - if (isTruthy(getEnv('PROMPT_CACHE_DISABLED'))) { - return false - } - const system = params.systemPrompt ?? '' if (!system) { return false From 38140c7f2242ecb82202f5f2dbd487b3c8f58b38 Mon Sep 17 00:00:00 2001 From: waleed Date: Tue, 16 Jun 2026 15:21:29 -0700 Subject: [PATCH 4/6] fix(providers): size prompt-cache gate on the larger of payload.system and request prompt Gate on max(final payload.system, request.systemPrompt) so caching fires both when the no-messages path blanks payload.system (size via the request prompt) and when prompt-based structured output appends a large schema to payload.system (size via the final system string). Add a test for the schema-appended case. Caught by Cursor Bugbot. --- apps/sim/providers/anthropic/utils.test.ts | 11 +++++++++++ apps/sim/providers/anthropic/utils.ts | 14 +++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/apps/sim/providers/anthropic/utils.test.ts b/apps/sim/providers/anthropic/utils.test.ts index 53cf8eabd2..5ef24c000d 100644 --- a/apps/sim/providers/anthropic/utils.test.ts +++ b/apps/sim/providers/anthropic/utils.test.ts @@ -56,6 +56,17 @@ describe('applyAnthropicPromptCache', () => { expect(tools[0].cache_control).toEqual({ type: 'ephemeral' }) }) + it('caches when payload.system is large from appended schema text even if the request prompt is small', () => { + // Prompt-based structured output appends a large schema to payload.system, + // so the cacheable system block is large even though request.systemPrompt is small. + const payload: { system?: string | TextBlockParam[] } = { system: LARGE } + + applyAnthropicPromptCache(payload, undefined, SMALL) + + expect(Array.isArray(payload.system)).toBe(true) + expect((payload.system as TextBlockParam[])[0].cache_control).toEqual({ type: 'ephemeral' }) + }) + it('leaves a small, tool-less prefix untouched (no write surcharge on one-shot calls)', () => { const payload: { system?: string | TextBlockParam[] } = { system: SMALL } diff --git a/apps/sim/providers/anthropic/utils.ts b/apps/sim/providers/anthropic/utils.ts index a4384c8bfa..e649b5d6bf 100644 --- a/apps/sim/providers/anthropic/utils.ts +++ b/apps/sim/providers/anthropic/utils.ts @@ -34,8 +34,16 @@ export function applyAnthropicPromptCache( tools: Tool[] | undefined, systemPrompt: string | null | undefined ): void { + const payloadSystem = typeof payload.system === 'string' ? payload.system : '' + + // Size the gate on the LARGER of the final payload.system (which may include + // appended structured-output schema text) and the original request prompt + // (non-empty even when the no-messages path relocates it out of payload.system). + const gateSystem = + payloadSystem.length >= (systemPrompt?.length ?? 0) ? payloadSystem : systemPrompt + const shouldCache = shouldCacheStaticPrefix({ - systemPrompt, + systemPrompt: gateSystem, hasTools: !!tools?.length, toolsApproxChars: tools ? JSON.stringify(tools).length : 0, }) @@ -43,8 +51,8 @@ export function applyAnthropicPromptCache( return } - if (typeof payload.system === 'string' && payload.system.length > 0) { - payload.system = [{ type: 'text', text: payload.system, cache_control: { type: 'ephemeral' } }] + if (payloadSystem.length > 0) { + payload.system = [{ type: 'text', text: payloadSystem, cache_control: { type: 'ephemeral' } }] } if (tools?.length) { From 5e906311d5724018c60e8660b2e12271975262e4 Mon Sep 17 00:00:00 2001 From: waleed Date: Tue, 16 Jun 2026 15:35:16 -0700 Subject: [PATCH 5/6] docs(providers): move prompt-cache inline comments into TSDoc Drop the inline // comments in favor of TSDoc on the helper/gate. The gate-sizing and call-ordering rationale now lives in applyAnthropicPromptCache's TSDoc; no behavior change. --- apps/sim/providers/anthropic/core.ts | 4 ---- apps/sim/providers/anthropic/utils.ts | 20 ++++++++++++-------- apps/sim/providers/prompt-cache.ts | 3 --- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/apps/sim/providers/anthropic/core.ts b/apps/sim/providers/anthropic/core.ts index 86b1247727..41d7184afe 100644 --- a/apps/sim/providers/anthropic/core.ts +++ b/apps/sim/providers/anthropic/core.ts @@ -325,10 +325,6 @@ export async function executeAnthropicProviderRequest( } } - // Prompt caching: mark the static prefix (system + tools) with an ephemeral - // cache breakpoint so repeated calls (agent tool-loops, multi-turn) reuse it. - // Runs after the structured-output block above, which assumes `system` is still - // a string. Mutates payload.system and the last tool in place. applyAnthropicPromptCache(payload, anthropicTools, request.systemPrompt) // Add extended thinking configuration if supported and requested diff --git a/apps/sim/providers/anthropic/utils.ts b/apps/sim/providers/anthropic/utils.ts index e649b5d6bf..230091bdcc 100644 --- a/apps/sim/providers/anthropic/utils.ts +++ b/apps/sim/providers/anthropic/utils.ts @@ -22,12 +22,19 @@ interface AnthropicCacheablePayload { * Marks the static request prefix (system prompt + tools) with an ephemeral * cache breakpoint when {@link shouldCacheStaticPrefix} deems it worthwhile, so * repeated calls reuse the cached prefix. Mutates `payload.system` (string → a - * single cached text block) and the last entry of `tools` in place. + * single cached text block) and the last entry of `tools` in place; a no-op when + * the prefix is too small or not present. Call after any structured-output + * mutation of `payload.system`, since it may replace the string with a block array. * - * `systemPrompt` is the ORIGINAL request system prompt, used only for the - * worthiness gate: on the no-messages path the provider relocates the system - * text into a user message and blanks `payload.system`, but the tools prefix is - * still worth caching there. + * The worthiness gate is sized on the LARGER of the final `payload.system` + * (which may include appended structured-output schema text) and the original + * `systemPrompt` (non-empty even when the no-messages path relocates the system + * text into a user message and blanks `payload.system` — the tools prefix is + * still worth caching there). + * + * @param payload - Anthropic request payload; `system` is mutated in place. + * @param tools - Anthropic tool definitions; the last entry is mutated in place. + * @param systemPrompt - The original request system prompt, used only for sizing. */ export function applyAnthropicPromptCache( payload: AnthropicCacheablePayload, @@ -36,9 +43,6 @@ export function applyAnthropicPromptCache( ): void { const payloadSystem = typeof payload.system === 'string' ? payload.system : '' - // Size the gate on the LARGER of the final payload.system (which may include - // appended structured-output schema text) and the original request prompt - // (non-empty even when the no-messages path relocates it out of payload.system). const gateSystem = payloadSystem.length >= (systemPrompt?.length ?? 0) ? payloadSystem : systemPrompt diff --git a/apps/sim/providers/prompt-cache.ts b/apps/sim/providers/prompt-cache.ts index e741ecbfa6..036467c5e1 100644 --- a/apps/sim/providers/prompt-cache.ts +++ b/apps/sim/providers/prompt-cache.ts @@ -40,8 +40,5 @@ export function shouldCacheStaticPrefix(params: { return false } - // Tools imply an agent loop (the prefix is re-read each iteration). Without - // tools, only cache when the system prompt alone is large enough to be worth - // the write on its own. return params.hasTools || systemTokens >= MIN_CACHEABLE_PREFIX_TOKENS } From b9a453d0263b4875edc989789b6d7fc20d82e389 Mon Sep 17 00:00:00 2001 From: waleed Date: Tue, 16 Jun 2026 15:59:48 -0700 Subject: [PATCH 6/6] test(providers): add request-capture test for Anthropic prompt caching Drives the real executeAnthropicProviderRequest down the streaming path with only the client injected via the createClient seam (real models/utils/attachments), and asserts the request payload handed to messages.create carries a cache_control-tagged system block for a large prompt and a plain string for a small one. Closes the end-to-end wiring gap (AI-SDK-style request-body capture). --- apps/sim/providers/anthropic/core.test.ts | 66 +++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 apps/sim/providers/anthropic/core.test.ts diff --git a/apps/sim/providers/anthropic/core.test.ts b/apps/sim/providers/anthropic/core.test.ts new file mode 100644 index 0000000000..c4cb6afd9b --- /dev/null +++ b/apps/sim/providers/anthropic/core.test.ts @@ -0,0 +1,66 @@ +/** + * @vitest-environment node + */ +import type Anthropic from '@anthropic-ai/sdk' +import type { TextBlockParam } from '@anthropic-ai/sdk/resources' +import { describe, expect, it, vi } from 'vitest' +import { executeAnthropicProviderRequest } from '@/providers/anthropic/core' +import type { ProviderRequest } from '@/providers/types' + +const LARGE = 'x'.repeat(8_000) // ~2,000 est. tokens, above the 1,024 gate +const SMALL = 'x'.repeat(400) // ~100 est. tokens, below the gate + +/** + * Drives the real `executeAnthropicProviderRequest` down the streaming/no-tools + * path and captures the request payload handed to `messages.create`, injecting + * only the client via the `createClient` seam (real models/utils/attachments run). + * The streaming path builds its stream lazily, so an empty async iterable suffices. + */ +async function captureRequestPayload( + request: Partial +): Promise> { + let captured: Record = {} + const fakeClient = { + messages: { + create: vi.fn(async (payload: Record) => { + captured = payload + return (async function* () {})() + }), + }, + } as unknown as Anthropic + + await executeAnthropicProviderRequest( + { + model: 'claude-sonnet-4-6', + messages: [{ role: 'user', content: 'hi' }], + apiKey: 'test-key', + stream: true, + ...request, + } as ProviderRequest, + { + providerId: 'anthropic', + providerLabel: 'Anthropic', + logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() } as never, + createClient: () => fakeClient, + } + ) + + return captured +} + +describe('executeAnthropicProviderRequest prompt caching (request capture)', () => { + it('emits a cache_control-tagged system block for a large system prompt', async () => { + const payload = await captureRequestPayload({ systemPrompt: LARGE }) + + expect(Array.isArray(payload.system)).toBe(true) + const blocks = payload.system as TextBlockParam[] + expect(blocks[0]).toMatchObject({ type: 'text', cache_control: { type: 'ephemeral' } }) + }) + + it('leaves a small system prompt as a plain string (no cache_control)', async () => { + const payload = await captureRequestPayload({ systemPrompt: SMALL }) + + expect(typeof payload.system).toBe('string') + expect(payload.system).toBe(SMALL) + }) +})