From ebe5447d0fac27e3b5b12c97b37fbf9494103df3 Mon Sep 17 00:00:00 2001
From: waleed <walif6@gmail.com>
Date: Tue, 16 Jun 2026 14:12:08 -0700
Subject: [PATCH 1/6] feat(providers): prompt caching for Anthropic +
 Azure-Anthropic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mark the static request prefix (system prompt + tools) with an ephemeral
cache_control breakpoint so repeated calls — agent tool-loops and multi-turn —
reuse the cached prefix (~90% cheaper cached input + lower latency). Azure-
Anthropic inherits this via the shared core.

- New providers/prompt-cache.ts gate: only caches when the static prefix is
  large enough to be cacheable AND likely reused (tools present, or a large
  system prompt), so a one-shot tool-less call never pays the cache-write
  surcharge. Kill switch: PROMPT_CACHE_DISABLED=true.
- anthropic/core.ts: convert system string -> a cached text block (after the
  structured-output concat, which assumes a string) and tag the last tool. Uses
  2 of Anthropic's 4 breakpoints; the tool-loop reuses the tagged payload.
- Outputs are unchanged; cost accounting already reads cache_read/creation
  tokens (buildAnthropicSegmentTokens), so usage stays accurate.

Matches the AI SDK / LangChain / Spring AI convention (explicit breakpoints for
Claude; automatic for OpenAI/Gemini). Bedrock + OpenRouter to follow (they need
cache-token accounting alongside).
---
 apps/sim/providers/anthropic/core.ts    | 22 ++++++++++
 apps/sim/providers/prompt-cache.test.ts | 58 +++++++++++++++++++++++++
 apps/sim/providers/prompt-cache.ts      | 55 +++++++++++++++++++++++
 3 files changed, 135 insertions(+)
 create mode 100644 apps/sim/providers/prompt-cache.test.ts
 create mode 100644 apps/sim/providers/prompt-cache.ts

diff --git a/apps/sim/providers/anthropic/core.ts b/apps/sim/providers/anthropic/core.ts
index 57056e6acc..292eb132b6 100644
--- a/apps/sim/providers/anthropic/core.ts
+++ b/apps/sim/providers/anthropic/core.ts
@@ -16,6 +16,7 @@ import {
   supportsNativeStructuredOutputs,
   supportsTemperature,
 } from '@/providers/models'
+import { shouldCacheStaticPrefix } from '@/providers/prompt-cache'
 import { createStreamingExecution } from '@/providers/streaming-execution'
 import { adaptAnthropicToolSchema } from '@/providers/tool-schema-adapter'
 import { enrichLastModelSegment } from '@/providers/trace-enrichment'
@@ -324,6 +325,20 @@ export async function executeAnthropicProviderRequest(
     }
   }
 
+  // Prompt caching: mark the static prefix (system + tools) with an ephemeral
+  // cache breakpoint so repeated calls (agent tool-loops, multi-turn) reuse it.
+  // Must run after the structured-output block above, which assumes `system` is
+  // still a string. Tools are tagged at their assignment below.
+  const cacheStaticPrefix = shouldCacheStaticPrefix({
+    systemPrompt: typeof payload.system === 'string' ? payload.system : '',
+    hasTools: !!anthropicTools?.length,
+    toolsApproxChars: anthropicTools ? JSON.stringify(anthropicTools).length : 0,
+  })
+
+  if (cacheStaticPrefix && typeof payload.system === 'string' && payload.system.length > 0) {
+    payload.system = [{ type: 'text', text: payload.system, cache_control: { type: 'ephemeral' } }]
+  }
+
   // Add extended thinking configuration if supported and requested
   // The 'none' sentinel means "disable thinking" — skip configuration entirely.
   if (request.thinkingLevel && request.thinkingLevel !== 'none') {
@@ -366,6 +381,13 @@ export async function executeAnthropicProviderRequest(
   }
 
   if (anthropicTools?.length) {
+    if (cacheStaticPrefix) {
+      const lastIndex = anthropicTools.length - 1
+      anthropicTools[lastIndex] = {
+        ...anthropicTools[lastIndex],
+        cache_control: { type: 'ephemeral' },
+      }
+    }
     payload.tools = anthropicTools
     // Per Anthropic docs: forced tool_choice (type: "tool" or "any") is incompatible with
     // thinking. Only auto and none are supported when thinking is enabled.
diff --git a/apps/sim/providers/prompt-cache.test.ts b/apps/sim/providers/prompt-cache.test.ts
new file mode 100644
index 0000000000..8999a2a252
--- /dev/null
+++ b/apps/sim/providers/prompt-cache.test.ts
@@ -0,0 +1,58 @@
+/**
+ * @vitest-environment node
+ */
+import { afterEach, beforeEach, describe, expect, it } from 'vitest'
+import { shouldCacheStaticPrefix } from '@/providers/prompt-cache'
+
+const LARGE = 'x'.repeat(8_000) // ~2,000 est. tokens, above the 1,024 gate
+const SMALL = 'x'.repeat(400) // ~100 est. tokens, below the gate
+
+describe('shouldCacheStaticPrefix', () => {
+  const original = process.env.PROMPT_CACHE_DISABLED
+
+  beforeEach(() => {
+    process.env.PROMPT_CACHE_DISABLED = undefined
+  })
+
+  afterEach(() => {
+    process.env.PROMPT_CACHE_DISABLED = original
+  })
+
+  it('caches a large system prompt that has tools (agent loop)', () => {
+    expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: true })).toBe(true)
+  })
+
+  it('caches a large system prompt even without tools', () => {
+    expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: false })).toBe(true)
+  })
+
+  it('reaches the threshold via tools when the system prompt alone is below it', () => {
+    // Small system + large serialized tools clears the combined threshold, and
+    // tools imply reuse, so it should cache.
+    expect(
+      shouldCacheStaticPrefix({ systemPrompt: SMALL, hasTools: true, toolsApproxChars: 8_000 })
+    ).toBe(true)
+  })
+
+  it('does NOT cache a small, tool-less prompt (one-shot write surcharge avoided)', () => {
+    expect(shouldCacheStaticPrefix({ systemPrompt: SMALL, hasTools: false })).toBe(false)
+  })
+
+  it('does NOT cache a small system even with tools when the combined prefix is below threshold', () => {
+    expect(
+      shouldCacheStaticPrefix({ systemPrompt: SMALL, hasTools: true, toolsApproxChars: 400 })
+    ).toBe(false)
+  })
+
+  it('does NOT cache when there is no system prompt', () => {
+    expect(
+      shouldCacheStaticPrefix({ systemPrompt: '', hasTools: true, toolsApproxChars: 8_000 })
+    ).toBe(false)
+    expect(shouldCacheStaticPrefix({ systemPrompt: null, hasTools: true })).toBe(false)
+  })
+
+  it('is disabled by the PROMPT_CACHE_DISABLED kill switch', () => {
+    process.env.PROMPT_CACHE_DISABLED = 'true'
+    expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: true })).toBe(false)
+  })
+})
diff --git a/apps/sim/providers/prompt-cache.ts b/apps/sim/providers/prompt-cache.ts
new file mode 100644
index 0000000000..21375c39ca
--- /dev/null
+++ b/apps/sim/providers/prompt-cache.ts
@@ -0,0 +1,55 @@
+import { getEnv, isTruthy } from '@/lib/core/config/env'
+
+/**
+ * Minimum estimated static-prefix size (system + tool definitions) before it is
+ * worth marking a prompt-cache breakpoint. This is a rough lower bound across
+ * Claude models (some require more); below it, providers silently skip caching
+ * anyway, so this only avoids spending a breakpoint on a trivially small prefix.
+ */
+const MIN_CACHEABLE_PREFIX_TOKENS = 1024
+
+/** Rough token estimate (~4 chars/token) — fast and good enough for a gate. */
+function estimateTokens(text: string): number {
+  return Math.ceil(text.length / 4)
+}
+
+/**
+ * Decides whether to inject prompt-cache breakpoints on the static prefix
+ * (system prompt + tool definitions) for providers that require explicit cache
+ * control (Anthropic, Bedrock, and Anthropic models via OpenRouter).
+ *
+ * Caching only pays off when the prefix is large enough to be cacheable AND is
+ * actually re-read: agent tool-loops re-send the prefix on every iteration, and
+ * a large system prompt is typically reused across runs within the cache TTL.
+ * A small, tool-less prompt is intentionally skipped so a one-shot call never
+ * pays the cache-write surcharge for a prefix that is never read back.
+ *
+ * Set `PROMPT_CACHE_DISABLED=true` to turn this off globally (kill switch).
+ */
+export function shouldCacheStaticPrefix(params: {
+  systemPrompt: string | null | undefined
+  hasTools: boolean
+  toolsApproxChars?: number
+}): boolean {
+  if (isTruthy(getEnv('PROMPT_CACHE_DISABLED'))) {
+    return false
+  }
+
+  const system = params.systemPrompt ?? ''
+  if (!system) {
+    return false
+  }
+
+  const systemTokens = estimateTokens(system)
+  const toolTokens = params.toolsApproxChars ? Math.ceil(params.toolsApproxChars / 4) : 0
+  const prefixTokens = systemTokens + toolTokens
+
+  if (prefixTokens < MIN_CACHEABLE_PREFIX_TOKENS) {
+    return false
+  }
+
+  // Tools imply an agent loop (the prefix is re-read each iteration). Without
+  // tools, only cache when the system prompt alone is large enough to be worth
+  // the write on its own.
+  return params.hasTools || systemTokens >= MIN_CACHEABLE_PREFIX_TOKENS
+}

From 3a449361f1228fc5e11ff8a304cbdeccec0aeccf Mon Sep 17 00:00:00 2001
From: waleed <walif6@gmail.com>
Date: Tue, 16 Jun 2026 14:55:30 -0700
Subject: [PATCH 2/6] fix(providers): cache gate uses request system prompt;
 test uses vi.stubEnv

- anthropic/core.ts: gate on request.systemPrompt instead of payload.system, so
  the no-messages path (where the system text is relocated into a user message
  and payload.system is blanked) still caches the tools prefix. (Cursor review)
- prompt-cache.test.ts: manage the kill-switch env via vi.stubEnv/unstubAllEnvs
  instead of assigning undefined (which coerces to "undefined" and leaks across
  workers). Addresses the Greptile finding while satisfying biome's noDelete rule.
---
 apps/sim/providers/anthropic/core.ts    |  6 +++++-
 apps/sim/providers/prompt-cache.test.ts | 13 +++++++------
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/apps/sim/providers/anthropic/core.ts b/apps/sim/providers/anthropic/core.ts
index 292eb132b6..717b9f93dc 100644
--- a/apps/sim/providers/anthropic/core.ts
+++ b/apps/sim/providers/anthropic/core.ts
@@ -329,8 +329,12 @@ export async function executeAnthropicProviderRequest(
   // cache breakpoint so repeated calls (agent tool-loops, multi-turn) reuse it.
   // Must run after the structured-output block above, which assumes `system` is
   // still a string. Tools are tagged at their assignment below.
+  // Gate on the original request system prompt, not payload.system: when there
+  // are no context/chat messages the system text is relocated into a user
+  // message and payload.system is blanked (see above), but the prefix is still
+  // worth caching (the tools, at least).
   const cacheStaticPrefix = shouldCacheStaticPrefix({
-    systemPrompt: typeof payload.system === 'string' ? payload.system : '',
+    systemPrompt: request.systemPrompt,
     hasTools: !!anthropicTools?.length,
     toolsApproxChars: anthropicTools ? JSON.stringify(anthropicTools).length : 0,
   })
diff --git a/apps/sim/providers/prompt-cache.test.ts b/apps/sim/providers/prompt-cache.test.ts
index 8999a2a252..18e59f6b81 100644
--- a/apps/sim/providers/prompt-cache.test.ts
+++ b/apps/sim/providers/prompt-cache.test.ts
@@ -1,21 +1,22 @@
 /**
  * @vitest-environment node
  */
-import { afterEach, beforeEach, describe, expect, it } from 'vitest'
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
 import { shouldCacheStaticPrefix } from '@/providers/prompt-cache'
 
 const LARGE = 'x'.repeat(8_000) // ~2,000 est. tokens, above the 1,024 gate
 const SMALL = 'x'.repeat(400) // ~100 est. tokens, below the gate
 
 describe('shouldCacheStaticPrefix', () => {
-  const original = process.env.PROMPT_CACHE_DISABLED
-
+  // vi.stubEnv cleanly sets/restores the kill switch without `delete` (which
+  // biome rewrites) or assigning `undefined` (which coerces to the string
+  // "undefined" and leaks to other tests in the worker).
   beforeEach(() => {
-    process.env.PROMPT_CACHE_DISABLED = undefined
+    vi.stubEnv('PROMPT_CACHE_DISABLED', '')
   })
 
   afterEach(() => {
-    process.env.PROMPT_CACHE_DISABLED = original
+    vi.unstubAllEnvs()
   })
 
   it('caches a large system prompt that has tools (agent loop)', () => {
@@ -52,7 +53,7 @@ describe('shouldCacheStaticPrefix', () => {
   })
 
   it('is disabled by the PROMPT_CACHE_DISABLED kill switch', () => {
-    process.env.PROMPT_CACHE_DISABLED = 'true'
+    vi.stubEnv('PROMPT_CACHE_DISABLED', 'true')
     expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: true })).toBe(false)
   })
 })

From 3855e0424874cc958dae52f30e366a0404c1a7e9 Mon Sep 17 00:00:00 2001
From: waleed <walif6@gmail.com>
Date: Tue, 16 Jun 2026 15:13:52 -0700
Subject: [PATCH 3/6] refactor(providers): always-on prompt caching via a
 directly-tested helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove the PROMPT_CACHE_DISABLED kill switch — prompt caching is always on.
- Extract the Anthropic tagging into applyAnthropicPromptCache(payload, tools,
  systemPrompt) in anthropic/utils.ts: one place that gates and mutates the
  system block + last tool, replacing the two inline blocks in core.ts.
- Add direct unit tests for the helper (system→cached block, last-tool tagged,
  relocated/blanked-system still tags tools, below-threshold and tool-less cases
  untouched) so the actual payload mutation is verified, not just the gate.

No behavior change to outputs; verified on vitest 4.1.8 (CI's version).
---
 apps/sim/providers/anthropic/core.ts       | 27 ++------
 apps/sim/providers/anthropic/utils.test.ts | 76 ++++++++++++++++++++++
 apps/sim/providers/anthropic/utils.ts      | 43 ++++++++++++
 apps/sim/providers/prompt-cache.test.ts    | 18 +----
 apps/sim/providers/prompt-cache.ts         |  8 ---
 5 files changed, 124 insertions(+), 48 deletions(-)
 create mode 100644 apps/sim/providers/anthropic/utils.test.ts

diff --git a/apps/sim/providers/anthropic/core.ts b/apps/sim/providers/anthropic/core.ts
index 717b9f93dc..86b1247727 100644
--- a/apps/sim/providers/anthropic/core.ts
+++ b/apps/sim/providers/anthropic/core.ts
@@ -6,6 +6,7 @@ import { getErrorMessage, toError } from '@sim/utils/errors'
 import type { BlockTokens, IterationToolCall, StreamingExecution } from '@/executor/types'
 import { MAX_TOOL_ITERATIONS } from '@/providers'
 import {
+  applyAnthropicPromptCache,
   checkForForcedToolUsage,
   createReadableStreamFromAnthropicStream,
 } from '@/providers/anthropic/utils'
@@ -16,7 +17,6 @@ import {
   supportsNativeStructuredOutputs,
   supportsTemperature,
 } from '@/providers/models'
-import { shouldCacheStaticPrefix } from '@/providers/prompt-cache'
 import { createStreamingExecution } from '@/providers/streaming-execution'
 import { adaptAnthropicToolSchema } from '@/providers/tool-schema-adapter'
 import { enrichLastModelSegment } from '@/providers/trace-enrichment'
@@ -327,21 +327,9 @@ export async function executeAnthropicProviderRequest(
 
   // Prompt caching: mark the static prefix (system + tools) with an ephemeral
   // cache breakpoint so repeated calls (agent tool-loops, multi-turn) reuse it.
-  // Must run after the structured-output block above, which assumes `system` is
-  // still a string. Tools are tagged at their assignment below.
-  // Gate on the original request system prompt, not payload.system: when there
-  // are no context/chat messages the system text is relocated into a user
-  // message and payload.system is blanked (see above), but the prefix is still
-  // worth caching (the tools, at least).
-  const cacheStaticPrefix = shouldCacheStaticPrefix({
-    systemPrompt: request.systemPrompt,
-    hasTools: !!anthropicTools?.length,
-    toolsApproxChars: anthropicTools ? JSON.stringify(anthropicTools).length : 0,
-  })
-
-  if (cacheStaticPrefix && typeof payload.system === 'string' && payload.system.length > 0) {
-    payload.system = [{ type: 'text', text: payload.system, cache_control: { type: 'ephemeral' } }]
-  }
+  // Runs after the structured-output block above, which assumes `system` is still
+  // a string. Mutates payload.system and the last tool in place.
+  applyAnthropicPromptCache(payload, anthropicTools, request.systemPrompt)
 
   // Add extended thinking configuration if supported and requested
   // The 'none' sentinel means "disable thinking" — skip configuration entirely.
@@ -385,13 +373,6 @@ export async function executeAnthropicProviderRequest(
   }
 
   if (anthropicTools?.length) {
-    if (cacheStaticPrefix) {
-      const lastIndex = anthropicTools.length - 1
-      anthropicTools[lastIndex] = {
-        ...anthropicTools[lastIndex],
-        cache_control: { type: 'ephemeral' },
-      }
-    }
     payload.tools = anthropicTools
     // Per Anthropic docs: forced tool_choice (type: "tool" or "any") is incompatible with
     // thinking. Only auto and none are supported when thinking is enabled.
diff --git a/apps/sim/providers/anthropic/utils.test.ts b/apps/sim/providers/anthropic/utils.test.ts
new file mode 100644
index 0000000000..53cf8eabd2
--- /dev/null
+++ b/apps/sim/providers/anthropic/utils.test.ts
@@ -0,0 +1,76 @@
+/**
+ * @vitest-environment node
+ */
+import type { TextBlockParam, Tool } from '@anthropic-ai/sdk/resources'
+import { describe, expect, it } from 'vitest'
+import { applyAnthropicPromptCache } from '@/providers/anthropic/utils'
+
+const LARGE = 'x'.repeat(8_000) // ~2,000 est. tokens, above the 1,024 gate
+const SMALL = 'x'.repeat(400) // ~100 est. tokens, below the gate
+
+const tool = (name: string): Tool => ({
+  name,
+  description: 'does a thing',
+  input_schema: { type: 'object', properties: {} },
+})
+
+describe('applyAnthropicPromptCache', () => {
+  it('converts a large system prompt to a cached text block and tags the last tool', () => {
+    const payload: { system?: string | TextBlockParam[] } = { system: LARGE }
+    const tools = [tool('a'), tool('b')]
+
+    applyAnthropicPromptCache(payload, tools, LARGE)
+
+    expect(Array.isArray(payload.system)).toBe(true)
+    const blocks = payload.system as TextBlockParam[]
+    expect(blocks).toHaveLength(1)
+    expect(blocks[0]).toMatchObject({
+      type: 'text',
+      text: LARGE,
+      cache_control: { type: 'ephemeral' },
+    })
+    // Only the LAST tool carries the breakpoint; earlier tools are untouched.
+    expect(tools[0].cache_control).toBeUndefined()
+    expect(tools[1].cache_control).toEqual({ type: 'ephemeral' })
+  })
+
+  it('tags the system block when the system alone is large and there are no tools', () => {
+    const payload: { system?: string | TextBlockParam[] } = { system: LARGE }
+
+    applyAnthropicPromptCache(payload, undefined, LARGE)
+
+    const blocks = payload.system as TextBlockParam[]
+    expect(blocks[0].cache_control).toEqual({ type: 'ephemeral' })
+  })
+
+  it('tags the tools even when payload.system was relocated/blanked (gate uses the request prompt)', () => {
+    // No-messages path: the provider moves the system text into a user message
+    // and blanks payload.system, but the original prompt is large, so the tools
+    // prefix is still worth caching.
+    const payload: { system?: string | TextBlockParam[] } = { system: '' }
+    const tools = [tool('a')]
+
+    applyAnthropicPromptCache(payload, tools, LARGE)
+
+    expect(payload.system).toBe('') // empty system is never converted
+    expect(tools[0].cache_control).toEqual({ type: 'ephemeral' })
+  })
+
+  it('leaves a small, tool-less prefix untouched (no write surcharge on one-shot calls)', () => {
+    const payload: { system?: string | TextBlockParam[] } = { system: SMALL }
+
+    applyAnthropicPromptCache(payload, undefined, SMALL)
+
+    expect(payload.system).toBe(SMALL)
+  })
+
+  it('does nothing when the combined prefix is below the threshold', () => {
+    const payload: { system?: string | TextBlockParam[] } = { system: SMALL }
+    const tools = [tool('a')]
+
+    applyAnthropicPromptCache(payload, tools, SMALL)
+
+    expect(payload.system).toBe(SMALL)
+    expect(tools[0].cache_control).toBeUndefined()
+  })
+})
diff --git a/apps/sim/providers/anthropic/utils.ts b/apps/sim/providers/anthropic/utils.ts
index b9b001bb7a..a4384c8bfa 100644
--- a/apps/sim/providers/anthropic/utils.ts
+++ b/apps/sim/providers/anthropic/utils.ts
@@ -2,14 +2,57 @@ import type {
   RawMessageDeltaEvent,
   RawMessageStartEvent,
   RawMessageStreamEvent,
+  TextBlockParam,
+  Tool,
   Usage,
 } from '@anthropic-ai/sdk/resources'
 import { createLogger } from '@sim/logger'
 import { randomFloat } from '@sim/utils/random'
+import { shouldCacheStaticPrefix } from '@/providers/prompt-cache'
 import { trackForcedToolUsage } from '@/providers/utils'
 
 const logger = createLogger('AnthropicUtils')
 
+/** Mutable view of the parts of the Anthropic payload that carry cache breakpoints. */
+interface AnthropicCacheablePayload {
+  system?: string | Array<TextBlockParam>
+}
+
+/**
+ * Marks the static request prefix (system prompt + tools) with an ephemeral
+ * cache breakpoint when {@link shouldCacheStaticPrefix} deems it worthwhile, so
+ * repeated calls reuse the cached prefix. Mutates `payload.system` (string → a
+ * single cached text block) and the last entry of `tools` in place.
+ *
+ * `systemPrompt` is the ORIGINAL request system prompt, used only for the
+ * worthiness gate: on the no-messages path the provider relocates the system
+ * text into a user message and blanks `payload.system`, but the tools prefix is
+ * still worth caching there.
+ */
+export function applyAnthropicPromptCache(
+  payload: AnthropicCacheablePayload,
+  tools: Tool[] | undefined,
+  systemPrompt: string | null | undefined
+): void {
+  const shouldCache = shouldCacheStaticPrefix({
+    systemPrompt,
+    hasTools: !!tools?.length,
+    toolsApproxChars: tools ? JSON.stringify(tools).length : 0,
+  })
+  if (!shouldCache) {
+    return
+  }
+
+  if (typeof payload.system === 'string' && payload.system.length > 0) {
+    payload.system = [{ type: 'text', text: payload.system, cache_control: { type: 'ephemeral' } }]
+  }
+
+  if (tools?.length) {
+    const lastIndex = tools.length - 1
+    tools[lastIndex] = { ...tools[lastIndex], cache_control: { type: 'ephemeral' } }
+  }
+}
+
 export interface AnthropicStreamUsage {
   input_tokens: number
   output_tokens: number
diff --git a/apps/sim/providers/prompt-cache.test.ts b/apps/sim/providers/prompt-cache.test.ts
index 18e59f6b81..7d4590ebb4 100644
--- a/apps/sim/providers/prompt-cache.test.ts
+++ b/apps/sim/providers/prompt-cache.test.ts
@@ -1,24 +1,13 @@
 /**
  * @vitest-environment node
  */
-import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
+import { describe, expect, it } from 'vitest'
 import { shouldCacheStaticPrefix } from '@/providers/prompt-cache'
 
 const LARGE = 'x'.repeat(8_000) // ~2,000 est. tokens, above the 1,024 gate
 const SMALL = 'x'.repeat(400) // ~100 est. tokens, below the gate
 
 describe('shouldCacheStaticPrefix', () => {
-  // vi.stubEnv cleanly sets/restores the kill switch without `delete` (which
-  // biome rewrites) or assigning `undefined` (which coerces to the string
-  // "undefined" and leaks to other tests in the worker).
-  beforeEach(() => {
-    vi.stubEnv('PROMPT_CACHE_DISABLED', '')
-  })
-
-  afterEach(() => {
-    vi.unstubAllEnvs()
-  })
-
   it('caches a large system prompt that has tools (agent loop)', () => {
     expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: true })).toBe(true)
   })
@@ -51,9 +40,4 @@ describe('shouldCacheStaticPrefix', () => {
     ).toBe(false)
     expect(shouldCacheStaticPrefix({ systemPrompt: null, hasTools: true })).toBe(false)
   })
-
-  it('is disabled by the PROMPT_CACHE_DISABLED kill switch', () => {
-    vi.stubEnv('PROMPT_CACHE_DISABLED', 'true')
-    expect(shouldCacheStaticPrefix({ systemPrompt: LARGE, hasTools: true })).toBe(false)
-  })
 })
diff --git a/apps/sim/providers/prompt-cache.ts b/apps/sim/providers/prompt-cache.ts
index 21375c39ca..e741ecbfa6 100644
--- a/apps/sim/providers/prompt-cache.ts
+++ b/apps/sim/providers/prompt-cache.ts
@@ -1,5 +1,3 @@
-import { getEnv, isTruthy } from '@/lib/core/config/env'
-
 /**
  * Minimum estimated static-prefix size (system + tool definitions) before it is
  * worth marking a prompt-cache breakpoint. This is a rough lower bound across
@@ -23,18 +21,12 @@ function estimateTokens(text: string): number {
  * a large system prompt is typically reused across runs within the cache TTL.
  * A small, tool-less prompt is intentionally skipped so a one-shot call never
  * pays the cache-write surcharge for a prefix that is never read back.
- *
- * Set `PROMPT_CACHE_DISABLED=true` to turn this off globally (kill switch).
  */
 export function shouldCacheStaticPrefix(params: {
   systemPrompt: string | null | undefined
   hasTools: boolean
   toolsApproxChars?: number
 }): boolean {
-  if (isTruthy(getEnv('PROMPT_CACHE_DISABLED'))) {
-    return false
-  }
-
   const system = params.systemPrompt ?? ''
   if (!system) {
     return false

From 38140c7f2242ecb82202f5f2dbd487b3c8f58b38 Mon Sep 17 00:00:00 2001
From: waleed <walif6@gmail.com>
Date: Tue, 16 Jun 2026 15:21:29 -0700
Subject: [PATCH 4/6] fix(providers): size prompt-cache gate on the larger of
 payload.system and request prompt

Gate on max(final payload.system, request.systemPrompt) so caching fires both
when the no-messages path blanks payload.system (size via the request prompt)
and when prompt-based structured output appends a large schema to payload.system
(size via the final system string). Add a test for the schema-appended case.

Caught by Cursor Bugbot.
---
 apps/sim/providers/anthropic/utils.test.ts | 11 +++++++++++
 apps/sim/providers/anthropic/utils.ts      | 14 +++++++++++---
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/apps/sim/providers/anthropic/utils.test.ts b/apps/sim/providers/anthropic/utils.test.ts
index 53cf8eabd2..5ef24c000d 100644
--- a/apps/sim/providers/anthropic/utils.test.ts
+++ b/apps/sim/providers/anthropic/utils.test.ts
@@ -56,6 +56,17 @@ describe('applyAnthropicPromptCache', () => {
     expect(tools[0].cache_control).toEqual({ type: 'ephemeral' })
   })
 
+  it('caches when payload.system is large from appended schema text even if the request prompt is small', () => {
+    // Prompt-based structured output appends a large schema to payload.system,
+    // so the cacheable system block is large even though request.systemPrompt is small.
+    const payload: { system?: string | TextBlockParam[] } = { system: LARGE }
+
+    applyAnthropicPromptCache(payload, undefined, SMALL)
+
+    expect(Array.isArray(payload.system)).toBe(true)
+    expect((payload.system as TextBlockParam[])[0].cache_control).toEqual({ type: 'ephemeral' })
+  })
+
   it('leaves a small, tool-less prefix untouched (no write surcharge on one-shot calls)', () => {
     const payload: { system?: string | TextBlockParam[] } = { system: SMALL }
 
diff --git a/apps/sim/providers/anthropic/utils.ts b/apps/sim/providers/anthropic/utils.ts
index a4384c8bfa..e649b5d6bf 100644
--- a/apps/sim/providers/anthropic/utils.ts
+++ b/apps/sim/providers/anthropic/utils.ts
@@ -34,8 +34,16 @@ export function applyAnthropicPromptCache(
   tools: Tool[] | undefined,
   systemPrompt: string | null | undefined
 ): void {
+  const payloadSystem = typeof payload.system === 'string' ? payload.system : ''
+
+  // Size the gate on the LARGER of the final payload.system (which may include
+  // appended structured-output schema text) and the original request prompt
+  // (non-empty even when the no-messages path relocates it out of payload.system).
+  const gateSystem =
+    payloadSystem.length >= (systemPrompt?.length ?? 0) ? payloadSystem : systemPrompt
+
   const shouldCache = shouldCacheStaticPrefix({
-    systemPrompt,
+    systemPrompt: gateSystem,
     hasTools: !!tools?.length,
     toolsApproxChars: tools ? JSON.stringify(tools).length : 0,
   })
@@ -43,8 +51,8 @@ export function applyAnthropicPromptCache(
     return
   }
 
-  if (typeof payload.system === 'string' && payload.system.length > 0) {
-    payload.system = [{ type: 'text', text: payload.system, cache_control: { type: 'ephemeral' } }]
+  if (payloadSystem.length > 0) {
+    payload.system = [{ type: 'text', text: payloadSystem, cache_control: { type: 'ephemeral' } }]
   }
 
   if (tools?.length) {

From 5e906311d5724018c60e8660b2e12271975262e4 Mon Sep 17 00:00:00 2001
From: waleed <walif6@gmail.com>
Date: Tue, 16 Jun 2026 15:35:16 -0700
Subject: [PATCH 5/6] docs(providers): move prompt-cache inline comments into
 TSDoc

Drop the inline // comments in favor of TSDoc on the helper/gate. The gate-sizing
and call-ordering rationale now lives in applyAnthropicPromptCache's TSDoc; no
behavior change.
---
 apps/sim/providers/anthropic/core.ts  |  4 ----
 apps/sim/providers/anthropic/utils.ts | 20 ++++++++++++--------
 apps/sim/providers/prompt-cache.ts    |  3 ---
 3 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/apps/sim/providers/anthropic/core.ts b/apps/sim/providers/anthropic/core.ts
index 86b1247727..41d7184afe 100644
--- a/apps/sim/providers/anthropic/core.ts
+++ b/apps/sim/providers/anthropic/core.ts
@@ -325,10 +325,6 @@ export async function executeAnthropicProviderRequest(
     }
   }
 
-  // Prompt caching: mark the static prefix (system + tools) with an ephemeral
-  // cache breakpoint so repeated calls (agent tool-loops, multi-turn) reuse it.
-  // Runs after the structured-output block above, which assumes `system` is still
-  // a string. Mutates payload.system and the last tool in place.
   applyAnthropicPromptCache(payload, anthropicTools, request.systemPrompt)
 
   // Add extended thinking configuration if supported and requested
diff --git a/apps/sim/providers/anthropic/utils.ts b/apps/sim/providers/anthropic/utils.ts
index e649b5d6bf..230091bdcc 100644
--- a/apps/sim/providers/anthropic/utils.ts
+++ b/apps/sim/providers/anthropic/utils.ts
@@ -22,12 +22,19 @@ interface AnthropicCacheablePayload {
  * Marks the static request prefix (system prompt + tools) with an ephemeral
  * cache breakpoint when {@link shouldCacheStaticPrefix} deems it worthwhile, so
  * repeated calls reuse the cached prefix. Mutates `payload.system` (string → a
- * single cached text block) and the last entry of `tools` in place.
+ * single cached text block) and the last entry of `tools` in place; a no-op when
+ * the prefix is too small or not present. Call after any structured-output
+ * mutation of `payload.system`, since it may replace the string with a block array.
  *
- * `systemPrompt` is the ORIGINAL request system prompt, used only for the
- * worthiness gate: on the no-messages path the provider relocates the system
- * text into a user message and blanks `payload.system`, but the tools prefix is
- * still worth caching there.
+ * The worthiness gate is sized on the LARGER of the final `payload.system`
+ * (which may include appended structured-output schema text) and the original
+ * `systemPrompt` (non-empty even when the no-messages path relocates the system
+ * text into a user message and blanks `payload.system` — the tools prefix is
+ * still worth caching there).
+ *
+ * @param payload - Anthropic request payload; `system` is mutated in place.
+ * @param tools - Anthropic tool definitions; the last entry is mutated in place.
+ * @param systemPrompt - The original request system prompt, used only for sizing.
  */
 export function applyAnthropicPromptCache(
   payload: AnthropicCacheablePayload,
@@ -36,9 +43,6 @@ export function applyAnthropicPromptCache(
 ): void {
   const payloadSystem = typeof payload.system === 'string' ? payload.system : ''
 
-  // Size the gate on the LARGER of the final payload.system (which may include
-  // appended structured-output schema text) and the original request prompt
-  // (non-empty even when the no-messages path relocates it out of payload.system).
   const gateSystem =
     payloadSystem.length >= (systemPrompt?.length ?? 0) ? payloadSystem : systemPrompt
 
diff --git a/apps/sim/providers/prompt-cache.ts b/apps/sim/providers/prompt-cache.ts
index e741ecbfa6..036467c5e1 100644
--- a/apps/sim/providers/prompt-cache.ts
+++ b/apps/sim/providers/prompt-cache.ts
@@ -40,8 +40,5 @@ export function shouldCacheStaticPrefix(params: {
     return false
   }
 
-  // Tools imply an agent loop (the prefix is re-read each iteration). Without
-  // tools, only cache when the system prompt alone is large enough to be worth
-  // the write on its own.
   return params.hasTools || systemTokens >= MIN_CACHEABLE_PREFIX_TOKENS
 }

From b9a453d0263b4875edc989789b6d7fc20d82e389 Mon Sep 17 00:00:00 2001
From: waleed <walif6@gmail.com>
Date: Tue, 16 Jun 2026 15:59:48 -0700
Subject: [PATCH 6/6] test(providers): add request-capture test for Anthropic
 prompt caching

Drives the real executeAnthropicProviderRequest down the streaming path with only
the client injected via the createClient seam (real models/utils/attachments),
and asserts the request payload handed to messages.create carries a
cache_control-tagged system block for a large prompt and a plain string for a
small one. Closes the end-to-end wiring gap (AI-SDK-style request-body capture).
---
 apps/sim/providers/anthropic/core.test.ts | 66 +++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 apps/sim/providers/anthropic/core.test.ts

diff --git a/apps/sim/providers/anthropic/core.test.ts b/apps/sim/providers/anthropic/core.test.ts
new file mode 100644
index 0000000000..c4cb6afd9b
--- /dev/null
+++ b/apps/sim/providers/anthropic/core.test.ts
@@ -0,0 +1,66 @@
+/**
+ * @vitest-environment node
+ */
+import type Anthropic from '@anthropic-ai/sdk'
+import type { TextBlockParam } from '@anthropic-ai/sdk/resources'
+import { describe, expect, it, vi } from 'vitest'
+import { executeAnthropicProviderRequest } from '@/providers/anthropic/core'
+import type { ProviderRequest } from '@/providers/types'
+
+const LARGE = 'x'.repeat(8_000) // ~2,000 est. tokens, above the 1,024 gate
+const SMALL = 'x'.repeat(400) // ~100 est. tokens, below the gate
+
+/**
+ * Drives the real `executeAnthropicProviderRequest` down the streaming/no-tools
+ * path and captures the request payload handed to `messages.create`, injecting
+ * only the client via the `createClient` seam (real models/utils/attachments run).
+ * The streaming path builds its stream lazily, so an empty async iterable suffices.
+ */
+async function captureRequestPayload(
+  request: Partial<ProviderRequest>
+): Promise<Record<string, unknown>> {
+  let captured: Record<string, unknown> = {}
+  const fakeClient = {
+    messages: {
+      create: vi.fn(async (payload: Record<string, unknown>) => {
+        captured = payload
+        return (async function* () {})()
+      }),
+    },
+  } as unknown as Anthropic
+
+  await executeAnthropicProviderRequest(
+    {
+      model: 'claude-sonnet-4-6',
+      messages: [{ role: 'user', content: 'hi' }],
+      apiKey: 'test-key',
+      stream: true,
+      ...request,
+    } as ProviderRequest,
+    {
+      providerId: 'anthropic',
+      providerLabel: 'Anthropic',
+      logger: { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() } as never,
+      createClient: () => fakeClient,
+    }
+  )
+
+  return captured
+}
+
+describe('executeAnthropicProviderRequest prompt caching (request capture)', () => {
+  it('emits a cache_control-tagged system block for a large system prompt', async () => {
+    const payload = await captureRequestPayload({ systemPrompt: LARGE })
+
+    expect(Array.isArray(payload.system)).toBe(true)
+    const blocks = payload.system as TextBlockParam[]
+    expect(blocks[0]).toMatchObject({ type: 'text', cache_control: { type: 'ephemeral' } })
+  })
+
+  it('leaves a small system prompt as a plain string (no cache_control)', async () => {
+    const payload = await captureRequestPayload({ systemPrompt: SMALL })
+
+    expect(typeof payload.system).toBe('string')
+    expect(payload.system).toBe(SMALL)
+  })
+})