From 95439a06368672fd0a194ad05110318970c1fbe5 Mon Sep 17 00:00:00 2001 From: awschmeder Date: Tue, 23 Jun 2026 09:55:48 -0700 Subject: [PATCH 1/6] fix: streaming tool-call argument loss in NativeToolCallParser (#695) --- .../fix-toolcall-dropped-leading-deltas.md | 5 + prs/fix-toolcall-dropped-leading-deltas.md | 39 +++++ .../assistant-message/NativeToolCallParser.ts | 47 +++--- .../__tests__/NativeToolCallParser.spec.ts | 144 +++++++++++++++++- .../native-tools/ask_followup_question.ts | 4 +- 5 files changed, 216 insertions(+), 23 deletions(-) create mode 100644 .changeset/fix-toolcall-dropped-leading-deltas.md create mode 100644 prs/fix-toolcall-dropped-leading-deltas.md diff --git a/.changeset/fix-toolcall-dropped-leading-deltas.md b/.changeset/fix-toolcall-dropped-leading-deltas.md new file mode 100644 index 0000000000..d4bedb753d --- /dev/null +++ b/.changeset/fix-toolcall-dropped-leading-deltas.md @@ -0,0 +1,5 @@ +--- +"zoo-code": patch +--- + +Fix streaming tool-call arguments being dropped when argument deltas arrive before the tool-call id, which caused spurious "missing required parameter" errors (affects LiteLLM, OpenAI-compatible, and DeepSeek providers). diff --git a/prs/fix-toolcall-dropped-leading-deltas.md b/prs/fix-toolcall-dropped-leading-deltas.md new file mode 100644 index 0000000000..0c26db49a5 --- /dev/null +++ b/prs/fix-toolcall-dropped-leading-deltas.md @@ -0,0 +1,39 @@ +### Related GitHub Issue + +Closes: #695 + +### Description + +When a provider streams a tool call whose first delta(s) arrive *before* the tool-call `id` is known, those leading argument bytes are silently discarded by `NativeToolCallParser.processRawChunk`. This causes downstream "missing required parameter" errors even when the model supplied the data. + +This PR fixes the issue by centralizing the tracking of streaming tool calls in `NativeToolCallParser`. The `rawChunkTracker` is now initialized on the first sight of a stream `index`, independent of whether an `id` is present. All `arguments` deltas are buffered until both `id` and `name` are known, ensuring no data loss during streaming reassembly. + +### Test Procedure + +1. Ran the newly added unit test in `src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts` which verifies that leading argument bytes arriving before the `id` are correctly preserved and finalized. +2. Verified that existing provider tests in the same test file pass. + +### Pre-Submission Checklist + +- [x] **Issue Linked**: This PR is linked to an approved GitHub Issue. +- [x] **Scope**: My changes are focused on the linked issue (one major feature/fix per PR). +- [x] **Self-Review**: I have performed a thorough self-review of my code. +- [x] **Testing**: New and/or updated tests have been added to cover my changes. +- [x] **Documentation Impact**: I have considered if my changes require documentation updates. +- [x] **Contribution Guidelines**: I have read and agree to the [Contributor Guidelines](/CONTRIBUTING.md). + +### Screenshots / Videos + +N/A + +### Documentation Updates + +- [x] No documentation updates are required. + +### Additional Notes + +N/A + +### Get in Touch + +@awschmeder diff --git a/src/core/assistant-message/NativeToolCallParser.ts b/src/core/assistant-message/NativeToolCallParser.ts index 9639ae1baa..b5e2fb51a8 100644 --- a/src/core/assistant-message/NativeToolCallParser.ts +++ b/src/core/assistant-message/NativeToolCallParser.ts @@ -66,7 +66,7 @@ export class NativeToolCallParser { private static rawChunkTracker = new Map< number, { - id: string + id?: string name: string hasStarted: boolean deltaBuffer: string[] @@ -105,10 +105,11 @@ export class NativeToolCallParser { const events: ToolCallStreamEvent[] = [] const { index, id, name, arguments: args } = chunk + // Create the tracker on first sight of this index, independent of whether + // an id has arrived yet. Keying the lifecycle by index (not id) ensures any + // `arguments` that stream before the id is known are buffered rather than dropped. let tracked = this.rawChunkTracker.get(index) - - // Initialize new tool call tracking when we receive an id - if (id && !tracked) { + if (!tracked) { tracked = { id, name: name || "", @@ -118,38 +119,39 @@ export class NativeToolCallParser { this.rawChunkTracker.set(index, tracked) } - if (!tracked) { - return events + // Record id and name as they arrive (they may come in separate chunks). + if (id) { + tracked.id = id } - - // Update name if present in chunk and not yet set if (name) { tracked.name = name } - // Emit start event when we have the name - if (!tracked.hasStarted && tracked.name) { + // Emit start event only once both id and name are known. Using a local + // non-null id keeps emitted events typed as id: string. + if (!tracked.hasStarted && tracked.id && tracked.name) { + const startedId = tracked.id events.push({ type: "tool_call_start", - id: tracked.id, + id: startedId, name: tracked.name, }) tracked.hasStarted = true - // Flush buffered deltas + // Flush buffered deltas accumulated during the pre-start window. for (const bufferedDelta of tracked.deltaBuffer) { events.push({ type: "tool_call_delta", - id: tracked.id, + id: startedId, delta: bufferedDelta, }) } tracked.deltaBuffer = [] } - // Emit delta event for argument chunks + // Emit delta event for argument chunks, buffering until start is emitted. if (args) { - if (tracked.hasStarted) { + if (tracked.hasStarted && tracked.id) { events.push({ type: "tool_call_delta", id: tracked.id, @@ -172,10 +174,15 @@ export class NativeToolCallParser { if (finishReason === "tool_calls" && this.rawChunkTracker.size > 0) { for (const [, tracked] of this.rawChunkTracker.entries()) { - events.push({ - type: "tool_call_end", - id: tracked.id, - }) + // Only emit an end for trackers that actually started. A tracker that + // never received an id/name (malformed stream) must not emit a phantom + // end; since start requires an id, hasStarted implies tracked.id is set. + if (tracked.hasStarted && tracked.id) { + events.push({ + type: "tool_call_end", + id: tracked.id, + }) + } } } @@ -191,7 +198,7 @@ export class NativeToolCallParser { if (this.rawChunkTracker.size > 0) { for (const [, tracked] of this.rawChunkTracker.entries()) { - if (tracked.hasStarted) { + if (tracked.hasStarted && tracked.id) { events.push({ type: "tool_call_end", id: tracked.id, diff --git a/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts b/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts index 2c15e12069..7d963aa5ab 100644 --- a/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts +++ b/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts @@ -1,4 +1,4 @@ -import { NativeToolCallParser } from "../NativeToolCallParser" +import { NativeToolCallParser, type ToolCallStreamEvent } from "../NativeToolCallParser" describe("NativeToolCallParser", () => { beforeEach(() => { @@ -343,4 +343,146 @@ describe("NativeToolCallParser", () => { }) }) }) + + describe("processRawChunk streaming reassembly", () => { + // Mirror the sequencing Task.ts performs: feed each raw chunk through + // processRawChunk, drive startStreamingToolCall on tool_call_start, feed + // tool_call_delta into processStreamingChunk, and finalize at the end. + // Returns the ordered event types/ids plus the finalized tool uses by id. + const drive = ( + rawChunks: Array<{ index: number; id?: string; name?: string; arguments?: string }>, + finishReason: string | null = "tool_calls", + ) => { + const events: ToolCallStreamEvent[] = [] + + const handleEvent = (event: ToolCallStreamEvent) => { + events.push(event) + if (event.type === "tool_call_start") { + NativeToolCallParser.startStreamingToolCall(event.id, event.name) + } else if (event.type === "tool_call_delta") { + NativeToolCallParser.processStreamingChunk(event.id, event.delta) + } + } + + for (const chunk of rawChunks) { + for (const event of NativeToolCallParser.processRawChunk(chunk)) { + handleEvent(event) + } + } + + // Task.ts emits ends via processFinishReason on finish_reason: "tool_calls". + // Use clearRawChunkState (not finalizeRawChunks) for cleanup so we don't + // double-count the end events both paths would produce. + for (const event of NativeToolCallParser.processFinishReason(finishReason)) { + handleEvent(event) + } + NativeToolCallParser.clearRawChunkState() + + const finalized = new Map>() + const startIds = events.filter((e) => e.type === "tool_call_start").map((e) => e.id) + for (const id of startIds) { + finalized.set(id, NativeToolCallParser.finalizeStreamingToolCall(id)) + } + + return { events, finalized } + } + + it("preserves leading argument bytes that arrive before the id", () => { + // First chunk carries arguments but NO id; id+name arrive later, then more args. + const fullArgs = JSON.stringify({ path: "src/leading.ts", mode: "slice" }) + const firstHalf = fullArgs.slice(0, 10) + const secondHalf = fullArgs.slice(10) + + const { events, finalized } = drive([ + { index: 0, arguments: firstHalf }, + { index: 0, id: "call_late_id", name: "read_file" }, + { index: 0, arguments: secondHalf }, + ]) + + // Exactly one start, in the right order, with the late id. + const starts = events.filter((e) => e.type === "tool_call_start") + expect(starts).toHaveLength(1) + expect(starts[0].id).toBe("call_late_id") + + // The finalized arguments must contain the complete, uncorrupted payload. + const result = finalized.get("call_late_id") + expect(result).not.toBeNull() + expect(result?.type).toBe("tool_use") + if (result?.type === "tool_use") { + const nativeArgs = result.nativeArgs as { path: string; mode?: string } + expect(nativeArgs.path).toBe("src/leading.ts") + expect(nativeArgs.mode).toBe("slice") + } + }) + + it("handles id and name arriving in separate chunks (issue #218)", () => { + const fullArgs = JSON.stringify({ path: "src/split.ts" }) + + const { events, finalized } = drive([ + { index: 0, id: "call_split" }, + { index: 0, name: "read_file" }, + { index: 0, arguments: fullArgs }, + ]) + + const starts = events.filter((e) => e.type === "tool_call_start") + expect(starts).toHaveLength(1) + expect(starts[0].id).toBe("call_split") + + const result = finalized.get("call_split") + expect(result?.type).toBe("tool_use") + if (result?.type === "tool_use") { + const nativeArgs = result.nativeArgs as { path: string } + expect(nativeArgs.path).toBe("src/split.ts") + } + }) + + it("keeps two parallel tool calls on distinct indices isolated", () => { + const argsA = JSON.stringify({ path: "src/a.ts" }) + const argsB = JSON.stringify({ path: "src/b.ts" }) + + const { events, finalized } = drive([ + { index: 0, arguments: argsA.slice(0, 8) }, + { index: 1, arguments: argsB.slice(0, 8) }, + { index: 0, id: "call_a", name: "read_file" }, + { index: 1, id: "call_b", name: "read_file" }, + { index: 0, arguments: argsA.slice(8) }, + { index: 1, arguments: argsB.slice(8) }, + ]) + + const starts = events.filter((e) => e.type === "tool_call_start") + expect(starts).toHaveLength(2) + + const resultA = finalized.get("call_a") + const resultB = finalized.get("call_b") + if (resultA?.type === "tool_use") { + expect((resultA.nativeArgs as { path: string }).path).toBe("src/a.ts") + } + if (resultB?.type === "tool_use") { + expect((resultB.nativeArgs as { path: string }).path).toBe("src/b.ts") + } + }) + + it("emits the same event sequence for the single-chunk-with-id flow (regression guard)", () => { + const fullArgs = JSON.stringify({ path: "src/single.ts" }) + + const { events, finalized } = drive([ + { index: 0, id: "call_single", name: "read_file", arguments: fullArgs }, + ]) + + expect(events.map((e) => e.type)).toEqual(["tool_call_start", "tool_call_delta", "tool_call_end"]) + expect(events.every((e) => e.id === "call_single")).toBe(true) + + const result = finalized.get("call_single") + if (result?.type === "tool_use") { + expect((result.nativeArgs as { path: string }).path).toBe("src/single.ts") + } + }) + + it("does not emit a phantom tool_call_end for a tracker that never received an id", () => { + const { events } = drive([{ index: 0, arguments: '{"path":"orphan.ts"}' }]) + + expect(events.filter((e) => e.type === "tool_call_start")).toHaveLength(0) + expect(events.filter((e) => e.type === "tool_call_end")).toHaveLength(0) + }) + }) }) diff --git a/src/core/prompts/tools/native-tools/ask_followup_question.ts b/src/core/prompts/tools/native-tools/ask_followup_question.ts index b0591206ad..8f064eb20b 100644 --- a/src/core/prompts/tools/native-tools/ask_followup_question.ts +++ b/src/core/prompts/tools/native-tools/ask_followup_question.ts @@ -4,7 +4,7 @@ const ASK_FOLLOWUP_QUESTION_DESCRIPTION = `Ask the user a question to gather add Parameters: - question: (required) A clear, specific question addressing the information needed -- follow_up: (required) A list of 2-4 suggested answers. Suggestions must be complete, actionable answers without placeholders. Optionally include mode to switch modes (code/architect/etc.) +- follow_up: (required) An array of 1-4 suggested answers. Always provide this as an array, even when there is only one suggestion. Each suggestion must be a complete, actionable answer without placeholders. Suggestions optionally include mode to switch modes (code/architect/etc.) Example: Asking for file path { "question": "What is the path to the frontend-config.json file?", "follow_up": [{ "text": "./src/frontend-config.json", "mode": null }, { "text": "./config/frontend-config.json", "mode": null }, { "text": "./frontend-config.json", "mode": null }] } @@ -14,7 +14,7 @@ Example: Asking with mode switch const QUESTION_PARAMETER_DESCRIPTION = `Clear, specific question that captures the missing information you need` -const FOLLOW_UP_PARAMETER_DESCRIPTION = `Required list of 2-4 suggested responses; each suggestion must be a complete, actionable answer and may include a mode switch` +const FOLLOW_UP_PARAMETER_DESCRIPTION = `Required array of 1-4 suggested responses, always an array even for a single suggestion; each suggestion must be a complete, actionable answer and may include a mode switch` const FOLLOW_UP_TEXT_DESCRIPTION = `Suggested answer the user can pick` From d911e989c2f4bb57a49d03b214ebcc3cae77d30d Mon Sep 17 00:00:00 2001 From: awschmeder Date: Tue, 23 Jun 2026 11:35:03 -0700 Subject: [PATCH 2/6] chore: remove agent-generated changeset file per policy --- .changeset/fix-toolcall-dropped-leading-deltas.md | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 .changeset/fix-toolcall-dropped-leading-deltas.md diff --git a/.changeset/fix-toolcall-dropped-leading-deltas.md b/.changeset/fix-toolcall-dropped-leading-deltas.md deleted file mode 100644 index d4bedb753d..0000000000 --- a/.changeset/fix-toolcall-dropped-leading-deltas.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"zoo-code": patch ---- - -Fix streaming tool-call arguments being dropped when argument deltas arrive before the tool-call id, which caused spurious "missing required parameter" errors (affects LiteLLM, OpenAI-compatible, and DeepSeek providers). From a01a8380540e0eff46e2f4cad52887ba84977f45 Mon Sep 17 00:00:00 2001 From: awschmeder Date: Tue, 23 Jun 2026 11:40:04 -0700 Subject: [PATCH 3/6] test: add direct coverage for finalizeRawChunks() in NativeToolCallParser --- .../__tests__/NativeToolCallParser.spec.ts | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts b/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts index 7d963aa5ab..2c33b8ceae 100644 --- a/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts +++ b/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts @@ -484,5 +484,60 @@ describe("NativeToolCallParser", () => { expect(events.filter((e) => e.type === "tool_call_start")).toHaveLength(0) expect(events.filter((e) => e.type === "tool_call_end")).toHaveLength(0) }) + + it("finalizeRawChunks() emits end events and guards against missing id", () => { + // Simulate a started tool call: process chunks to populate state + const chunks = [ + { index: 0, id: "call_finalize", name: "read_file" }, + { index: 0, arguments: '{"path":"file.ts"' }, + { index: 0, arguments: ',"mode":"slice"}' }, + ] + + const events: Array<{ type: string; id?: string }> = [] + for (const chunk of chunks) { + for (const event of NativeToolCallParser.processRawChunk(chunk)) { + events.push(event) + if (event.type === "tool_call_start") { + NativeToolCallParser.startStreamingToolCall(event.id, event.name) + } else if (event.type === "tool_call_delta") { + NativeToolCallParser.processStreamingChunk(event.id, event.delta) + } + } + } + + // Now finalize the raw chunks to emit the end event + const finalizeEvents = NativeToolCallParser.finalizeRawChunks() + for (const event of finalizeEvents) { + events.push(event) + } + + // Verify the end event was produced by finalizeRawChunks + const ends = events.filter((e) => e.type === "tool_call_end") + expect(ends).toHaveLength(1) + expect(ends[0].id).toBe("call_finalize") + + // Finalize the tool call to ensure it contains the complete arguments + const result = NativeToolCallParser.finalizeStreamingToolCall("call_finalize") + expect(result?.type).toBe("tool_use") + if (result?.type === "tool_use") { + expect((result.nativeArgs as { path: string }).path).toBe("file.ts") + } + }) + + it("finalizeRawChunks() does not emit end for tracker without id", () => { + // Start a tracker with arguments but no id, then finalize + const chunks = [{ index: 0, arguments: '{"incomplete":true}' }] + + for (const chunk of chunks) { + NativeToolCallParser.processRawChunk(chunk) + } + + // Finalize should not emit an end event if id was never set + const finalizeEvents = NativeToolCallParser.finalizeRawChunks() + const ends = finalizeEvents.filter((e) => e.type === "tool_call_end") + expect(ends).toHaveLength(0) + + NativeToolCallParser.clearRawChunkState() + }) }) }) From fba6c71d5c464d7490e1bea2d8f08d2b54b55c17 Mon Sep 17 00:00:00 2001 From: awschmeder Date: Fri, 26 Jun 2026 12:32:31 -0700 Subject: [PATCH 4/6] fix: address PR #700 review feedback for tool-call streaming reassembly - Guard finalize results with not.toBeNull() in parallel-index and single-chunk tests so a null result fails instead of passing silently - Add reverse-ordering test (name -> buffered args -> id) covering the start-gate id requirement - Use name !== undefined recording plus a nameSeen flag in the start-gate as a defensive guard against an empty tool name - Clear rawChunkTracker in processFinishReason so finalizeRawChunks is a safe no-op; add a regression test asserting no double tool_call_end - Remove unrelated ask_followup_question wording change from PR scope - Remove prs/fix-toolcall-dropped-leading-deltas.md from the diff --- prs/fix-toolcall-dropped-leading-deltas.md | 39 ------------- .../assistant-message/NativeToolCallParser.ts | 13 ++++- .../__tests__/NativeToolCallParser.spec.ts | 56 +++++++++++++++++++ .../native-tools/ask_followup_question.ts | 4 +- 4 files changed, 69 insertions(+), 43 deletions(-) delete mode 100644 prs/fix-toolcall-dropped-leading-deltas.md diff --git a/prs/fix-toolcall-dropped-leading-deltas.md b/prs/fix-toolcall-dropped-leading-deltas.md deleted file mode 100644 index 0c26db49a5..0000000000 --- a/prs/fix-toolcall-dropped-leading-deltas.md +++ /dev/null @@ -1,39 +0,0 @@ -### Related GitHub Issue - -Closes: #695 - -### Description - -When a provider streams a tool call whose first delta(s) arrive *before* the tool-call `id` is known, those leading argument bytes are silently discarded by `NativeToolCallParser.processRawChunk`. This causes downstream "missing required parameter" errors even when the model supplied the data. - -This PR fixes the issue by centralizing the tracking of streaming tool calls in `NativeToolCallParser`. The `rawChunkTracker` is now initialized on the first sight of a stream `index`, independent of whether an `id` is present. All `arguments` deltas are buffered until both `id` and `name` are known, ensuring no data loss during streaming reassembly. - -### Test Procedure - -1. Ran the newly added unit test in `src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts` which verifies that leading argument bytes arriving before the `id` are correctly preserved and finalized. -2. Verified that existing provider tests in the same test file pass. - -### Pre-Submission Checklist - -- [x] **Issue Linked**: This PR is linked to an approved GitHub Issue. -- [x] **Scope**: My changes are focused on the linked issue (one major feature/fix per PR). -- [x] **Self-Review**: I have performed a thorough self-review of my code. -- [x] **Testing**: New and/or updated tests have been added to cover my changes. -- [x] **Documentation Impact**: I have considered if my changes require documentation updates. -- [x] **Contribution Guidelines**: I have read and agree to the [Contributor Guidelines](/CONTRIBUTING.md). - -### Screenshots / Videos - -N/A - -### Documentation Updates - -- [x] No documentation updates are required. - -### Additional Notes - -N/A - -### Get in Touch - -@awschmeder diff --git a/src/core/assistant-message/NativeToolCallParser.ts b/src/core/assistant-message/NativeToolCallParser.ts index b5e2fb51a8..a223d95edd 100644 --- a/src/core/assistant-message/NativeToolCallParser.ts +++ b/src/core/assistant-message/NativeToolCallParser.ts @@ -68,6 +68,10 @@ export class NativeToolCallParser { { id?: string name: string + // Tracks whether a name field has been observed at all, distinct from a + // truthy name. This is a defensive guard: should a provider ever send an + // empty name, the start-gate must not rely on name truthiness. + nameSeen: boolean hasStarted: boolean deltaBuffer: string[] } @@ -113,6 +117,7 @@ export class NativeToolCallParser { tracked = { id, name: name || "", + nameSeen: name !== undefined, hasStarted: false, deltaBuffer: [], } @@ -123,13 +128,14 @@ export class NativeToolCallParser { if (id) { tracked.id = id } - if (name) { + if (name !== undefined) { tracked.name = name + tracked.nameSeen = true } // Emit start event only once both id and name are known. Using a local // non-null id keeps emitted events typed as id: string. - if (!tracked.hasStarted && tracked.id && tracked.name) { + if (!tracked.hasStarted && tracked.id && tracked.nameSeen) { const startedId = tracked.id events.push({ type: "tool_call_start", @@ -184,6 +190,9 @@ export class NativeToolCallParser { }) } } + // Clear so a subsequent finalizeRawChunks() is a safe no-op and cannot + // double-fire end events for the same trackers. + this.rawChunkTracker.clear() } return events diff --git a/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts b/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts index 2c33b8ceae..a5119a2eac 100644 --- a/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts +++ b/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts @@ -436,6 +436,36 @@ describe("NativeToolCallParser", () => { } }) + it("handles name arriving before id with buffered args in between (reverse ordering)", () => { + const fullArgs = JSON.stringify({ path: "src/reverse.ts" }) + const firstHalf = fullArgs.slice(0, 9) + const secondHalf = fullArgs.slice(9) + + const { events, finalized } = drive([ + { index: 0, name: "read_file" }, + { index: 0, arguments: firstHalf }, + { index: 0, id: "call_reverse" }, + { index: 0, arguments: secondHalf }, + ]) + + // Start must not fire until the id arrives, so exactly one start with the late id. + const starts = events.filter((e) => e.type === "tool_call_start") + expect(starts).toHaveLength(1) + expect(starts[0].id).toBe("call_reverse") + + // The buffered delta must be flushed only after the start event. + const startIndex = events.findIndex((e) => e.type === "tool_call_start") + const firstDeltaIndex = events.findIndex((e) => e.type === "tool_call_delta") + expect(startIndex).toBeLessThan(firstDeltaIndex) + + const result = finalized.get("call_reverse") + expect(result).not.toBeNull() + expect(result?.type).toBe("tool_use") + if (result?.type === "tool_use") { + expect((result.nativeArgs as { path: string }).path).toBe("src/reverse.ts") + } + }) + it("keeps two parallel tool calls on distinct indices isolated", () => { const argsA = JSON.stringify({ path: "src/a.ts" }) const argsB = JSON.stringify({ path: "src/b.ts" }) @@ -454,6 +484,8 @@ describe("NativeToolCallParser", () => { const resultA = finalized.get("call_a") const resultB = finalized.get("call_b") + expect(resultA).not.toBeNull() + expect(resultB).not.toBeNull() if (resultA?.type === "tool_use") { expect((resultA.nativeArgs as { path: string }).path).toBe("src/a.ts") } @@ -473,6 +505,8 @@ describe("NativeToolCallParser", () => { expect(events.every((e) => e.id === "call_single")).toBe(true) const result = finalized.get("call_single") + expect(result).not.toBeNull() + expect(result?.type).toBe("tool_use") if (result?.type === "tool_use") { expect((result.nativeArgs as { path: string }).path).toBe("src/single.ts") } @@ -539,5 +573,27 @@ describe("NativeToolCallParser", () => { NativeToolCallParser.clearRawChunkState() }) + + it("does not double-fire end events across processFinishReason and finalizeRawChunks", () => { + // Drive a started tool call through the raw chunk path. + const chunks = [ + { index: 0, id: "call_dup", name: "read_file" }, + { index: 0, arguments: '{"path":"file.ts"}' }, + ] + for (const chunk of chunks) { + NativeToolCallParser.processRawChunk(chunk) + } + + // Task.ts emits ends via processFinishReason, then calls finalizeRawChunks + // unconditionally. Both must not emit an end for the same tracker. + const finishEvents = NativeToolCallParser.processFinishReason("tool_calls") + const finalizeEvents = NativeToolCallParser.finalizeRawChunks() + + const allEnds = [...finishEvents, ...finalizeEvents].filter((e) => e.type === "tool_call_end") + expect(allEnds).toHaveLength(1) + expect(allEnds[0].id).toBe("call_dup") + + NativeToolCallParser.clearRawChunkState() + }) }) }) diff --git a/src/core/prompts/tools/native-tools/ask_followup_question.ts b/src/core/prompts/tools/native-tools/ask_followup_question.ts index 8f064eb20b..b0591206ad 100644 --- a/src/core/prompts/tools/native-tools/ask_followup_question.ts +++ b/src/core/prompts/tools/native-tools/ask_followup_question.ts @@ -4,7 +4,7 @@ const ASK_FOLLOWUP_QUESTION_DESCRIPTION = `Ask the user a question to gather add Parameters: - question: (required) A clear, specific question addressing the information needed -- follow_up: (required) An array of 1-4 suggested answers. Always provide this as an array, even when there is only one suggestion. Each suggestion must be a complete, actionable answer without placeholders. Suggestions optionally include mode to switch modes (code/architect/etc.) +- follow_up: (required) A list of 2-4 suggested answers. Suggestions must be complete, actionable answers without placeholders. Optionally include mode to switch modes (code/architect/etc.) Example: Asking for file path { "question": "What is the path to the frontend-config.json file?", "follow_up": [{ "text": "./src/frontend-config.json", "mode": null }, { "text": "./config/frontend-config.json", "mode": null }, { "text": "./frontend-config.json", "mode": null }] } @@ -14,7 +14,7 @@ Example: Asking with mode switch const QUESTION_PARAMETER_DESCRIPTION = `Clear, specific question that captures the missing information you need` -const FOLLOW_UP_PARAMETER_DESCRIPTION = `Required array of 1-4 suggested responses, always an array even for a single suggestion; each suggestion must be a complete, actionable answer and may include a mode switch` +const FOLLOW_UP_PARAMETER_DESCRIPTION = `Required list of 2-4 suggested responses; each suggestion must be a complete, actionable answer and may include a mode switch` const FOLLOW_UP_TEXT_DESCRIPTION = `Suggested answer the user can pick` From f2396b92b2bcefb3cc6a7a832d738c69eb69cf7f Mon Sep 17 00:00:00 2001 From: awschmeder Date: Fri, 26 Jun 2026 13:20:12 -0700 Subject: [PATCH 5/6] fix: handle provider-emitted tool_call_end chunks during streaming Task.ts had no stream-level case for tool_call_end, so end chunks emitted by providers on finish_reason: "tool_calls" were silently dropped; tool calls only finalized at stream end via finalizeRawChunks(). Add a tool_call_end case so tools finalize and present during streaming, and extract the triplicated finalize/present logic into a shared idempotent helper. Correct the NativeToolCallParser test drive helper to finalize via finalizeRawChunks() (matching production) instead of processFinishReason(). --- .../__tests__/NativeToolCallParser.spec.ts | 16 +-- src/core/task/Task.ts | 134 +++++++----------- 2 files changed, 54 insertions(+), 96 deletions(-) diff --git a/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts b/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts index a5119a2eac..47dd616420 100644 --- a/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts +++ b/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts @@ -347,12 +347,10 @@ describe("NativeToolCallParser", () => { describe("processRawChunk streaming reassembly", () => { // Mirror the sequencing Task.ts performs: feed each raw chunk through // processRawChunk, drive startStreamingToolCall on tool_call_start, feed - // tool_call_delta into processStreamingChunk, and finalize at the end. + // tool_call_delta into processStreamingChunk, and emit ends at stream close + // via finalizeRawChunks() (the same call Task.ts makes after the stream ends). // Returns the ordered event types/ids plus the finalized tool uses by id. - const drive = ( - rawChunks: Array<{ index: number; id?: string; name?: string; arguments?: string }>, - finishReason: string | null = "tool_calls", - ) => { + const drive = (rawChunks: Array<{ index: number; id?: string; name?: string; arguments?: string }>) => { const events: ToolCallStreamEvent[] = [] const handleEvent = (event: ToolCallStreamEvent) => { @@ -370,13 +368,11 @@ describe("NativeToolCallParser", () => { } } - // Task.ts emits ends via processFinishReason on finish_reason: "tool_calls". - // Use clearRawChunkState (not finalizeRawChunks) for cleanup so we don't - // double-count the end events both paths would produce. - for (const event of NativeToolCallParser.processFinishReason(finishReason)) { + // Task.ts finalizes any tool calls still open at stream end via + // finalizeRawChunks(), which emits the tool_call_end events. + for (const event of NativeToolCallParser.finalizeRawChunks()) { handleEvent(event) } - NativeToolCallParser.clearRawChunkState() const finalized = new Map>() const startIds = events.filter((e) => e.type === "tool_call_start").map((e) => e.id) diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 50d4674fd0..447d94acbe 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -389,6 +389,43 @@ export class Task extends EventEmitter implements TaskLike { // Native tool call streaming state (track which index each tool is at) private streamingToolCallIndices: Map = new Map() + /** + * Finalize a streaming native tool call by id and present it. + * + * Shared by every site that observes a tool_call_end: the per-chunk event + * loop, the stream-level tool_call_end case, and the end-of-stream + * finalizeRawChunks() pass. Calling it again for an already-finalized id is a + * safe no-op because finalizeStreamingToolCall() and the index map entry are + * both cleared on first finalize. + */ + private finalizeStreamingToolCallById(id: string): void { + const finalToolUse = NativeToolCallParser.finalizeStreamingToolCall(id) + const toolUseIndex = this.streamingToolCallIndices.get(id) + + if (finalToolUse) { + ;(finalToolUse as any).id = id + if (toolUseIndex !== undefined) { + this.assistantMessageContent[toolUseIndex] = finalToolUse + } + this.streamingToolCallIndices.delete(id) + this.userMessageContentReady = false + presentAssistantMessage(this) + } else if (toolUseIndex !== undefined) { + // finalizeStreamingToolCall returned null (malformed JSON or missing args). + // Mark the tool as non-partial so it is presented as complete; execution + // will be short-circuited in presentAssistantMessage with a structured + // tool_result and validation will surface any missing required params. + const existingToolUse = this.assistantMessageContent[toolUseIndex] + if (existingToolUse && existingToolUse.type === "tool_use") { + existingToolUse.partial = false + ;(existingToolUse as any).id = id + } + this.streamingToolCallIndices.delete(id) + this.userMessageContentReady = false + presentAssistantMessage(this) + } + } + // Cached model info for current streaming session (set at start of each API request) // This prevents excessive getModel() calls during tool execution cachedStreamingModel?: { id: string; info: ModelInfo } @@ -2817,54 +2854,21 @@ export class Task extends EventEmitter implements TaskLike { } } } else if (event.type === "tool_call_end") { - // Finalize the streaming tool call - const finalToolUse = NativeToolCallParser.finalizeStreamingToolCall(event.id) - - // Get the index for this tool call - const toolUseIndex = this.streamingToolCallIndices.get(event.id) - - if (finalToolUse) { - // Store the tool call ID - ;(finalToolUse as any).id = event.id - - // Get the index and replace partial with final - if (toolUseIndex !== undefined) { - this.assistantMessageContent[toolUseIndex] = finalToolUse - } - - // Clean up tracking - this.streamingToolCallIndices.delete(event.id) - - // Mark that we have new content to process - this.userMessageContentReady = false - - // Present the finalized tool call - presentAssistantMessage(this) - } else if (toolUseIndex !== undefined) { - // finalizeStreamingToolCall returned null (malformed JSON or missing args) - // Mark the tool as non-partial so it's presented as complete, but execution - // will be short-circuited in presentAssistantMessage with a structured tool_result. - const existingToolUse = this.assistantMessageContent[toolUseIndex] - if (existingToolUse && existingToolUse.type === "tool_use") { - existingToolUse.partial = false - // Ensure it has the ID for native protocol - ;(existingToolUse as any).id = event.id - } - - // Clean up tracking - this.streamingToolCallIndices.delete(event.id) - - // Mark that we have new content to process - this.userMessageContentReady = false - - // Present the tool call - validation will handle missing params - presentAssistantMessage(this) - } + this.finalizeStreamingToolCallById(event.id) } } break } + case "tool_call_end": { + // Providers emit a tool_call_end chunk when finish_reason is + // "tool_calls" (either directly or via processFinishReason). + // Finalize the streaming tool call now so it is presented during + // streaming rather than waiting for finalizeRawChunks() at stream end. + this.finalizeStreamingToolCallById(chunk.id) + break + } + case "tool_call": { // Legacy: Handle complete tool calls (for backward compatibility) // Convert native tool call to ToolUse format @@ -3215,49 +3219,7 @@ export class Task extends EventEmitter implements TaskLike { const finalizeEvents = NativeToolCallParser.finalizeRawChunks() for (const event of finalizeEvents) { if (event.type === "tool_call_end") { - // Finalize the streaming tool call - const finalToolUse = NativeToolCallParser.finalizeStreamingToolCall(event.id) - - // Get the index for this tool call - const toolUseIndex = this.streamingToolCallIndices.get(event.id) - - if (finalToolUse) { - // Store the tool call ID - ;(finalToolUse as any).id = event.id - - // Get the index and replace partial with final - if (toolUseIndex !== undefined) { - this.assistantMessageContent[toolUseIndex] = finalToolUse - } - - // Clean up tracking - this.streamingToolCallIndices.delete(event.id) - - // Mark that we have new content to process - this.userMessageContentReady = false - - // Present the finalized tool call - presentAssistantMessage(this) - } else if (toolUseIndex !== undefined) { - // finalizeStreamingToolCall returned null (malformed JSON or missing args) - // We still need to mark the tool as non-partial so it gets executed - // The tool's validation will catch any missing required parameters - const existingToolUse = this.assistantMessageContent[toolUseIndex] - if (existingToolUse && existingToolUse.type === "tool_use") { - existingToolUse.partial = false - // Ensure it has the ID for native protocol - ;(existingToolUse as any).id = event.id - } - - // Clean up tracking - this.streamingToolCallIndices.delete(event.id) - - // Mark that we have new content to process - this.userMessageContentReady = false - - // Present the tool call - validation will handle missing params - presentAssistantMessage(this) - } + this.finalizeStreamingToolCallById(event.id) } } From 8252abc3bdd53b22cdb52b8a3c6a5147f0021a92 Mon Sep 17 00:00:00 2001 From: awschmeder Date: Fri, 26 Jun 2026 18:08:30 -0700 Subject: [PATCH 6/6] test: cover Task.finalizeStreamingToolCallById streaming finalization Add a focused spec that invokes the real Task.prototype.finalizeStreamingToolCallById via .call() with mocked presentAssistantMessage and NativeToolCallParser, covering the success, null-finalize (malformed JSON), untracked-id no-op, and idempotent re-finalize paths. Closes the codecov/patch gap on the new helper. --- .../finalizeStreamingToolCallById.spec.ts | 118 ++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 src/core/task/__tests__/finalizeStreamingToolCallById.spec.ts diff --git a/src/core/task/__tests__/finalizeStreamingToolCallById.spec.ts b/src/core/task/__tests__/finalizeStreamingToolCallById.spec.ts new file mode 100644 index 0000000000..da1872ab15 --- /dev/null +++ b/src/core/task/__tests__/finalizeStreamingToolCallById.spec.ts @@ -0,0 +1,118 @@ +// npx vitest run core/task/__tests__/finalizeStreamingToolCallById.spec.ts + +import { Task } from "../Task" +import { presentAssistantMessage } from "../../assistant-message" +import { NativeToolCallParser } from "../../assistant-message/NativeToolCallParser" +import type { ToolUse } from "../../../shared/tools" + +// presentAssistantMessage is invoked by finalizeStreamingToolCallById to flush the +// finalized tool use; mocking it isolates the helper from the full presentation pipeline. +vi.mock("../../assistant-message", async (importOriginal) => { + const actual = (await importOriginal()) as Record + return { + ...actual, + presentAssistantMessage: vi.fn(), + } +}) + +const mockedPresent = vi.mocked(presentAssistantMessage) + +/** + * Invoke the private finalizeStreamingToolCallById against a minimal `this` stub. + * + * Instantiating a full Task requires a provider, context, and async setup that are + * irrelevant to this helper. The method only touches assistantMessageContent, + * streamingToolCallIndices, and userMessageContentReady, so a stub carrying those + * fields exercises the real source lines without the constructor. + */ +function callFinalize( + stub: { + assistantMessageContent: any[] + streamingToolCallIndices: Map + userMessageContentReady: boolean + }, + id: string, +): void { + ;(Task.prototype as any).finalizeStreamingToolCallById.call(stub, id) +} + +describe("Task.finalizeStreamingToolCallById", () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it("replaces the partial block with the finalized tool use and presents it", () => { + const finalToolUse = { type: "tool_use", name: "read_file", partial: false } as unknown as ToolUse + const finalizeSpy = vi + .spyOn(NativeToolCallParser, "finalizeStreamingToolCall") + .mockReturnValue(finalToolUse) + + const stub = { + assistantMessageContent: [{ type: "tool_use", id: "call_abc", name: "read_file", partial: true }], + streamingToolCallIndices: new Map([["call_abc", 0]]), + userMessageContentReady: true, + } + + callFinalize(stub, "call_abc") + + expect(finalizeSpy).toHaveBeenCalledWith("call_abc") + expect(stub.assistantMessageContent[0]).toBe(finalToolUse) + expect((stub.assistantMessageContent[0] as any).id).toBe("call_abc") + expect(stub.streamingToolCallIndices.has("call_abc")).toBe(false) + expect(stub.userMessageContentReady).toBe(false) + expect(mockedPresent).toHaveBeenCalledTimes(1) + }) + + it("marks the existing block non-partial when finalize returns null (malformed JSON)", () => { + vi.spyOn(NativeToolCallParser, "finalizeStreamingToolCall").mockReturnValue(null) + + const existingBlock = { type: "tool_use", id: "call_bad", name: "write_to_file", partial: true } + const stub = { + assistantMessageContent: [existingBlock], + streamingToolCallIndices: new Map([["call_bad", 0]]), + userMessageContentReady: true, + } + + callFinalize(stub, "call_bad") + + expect(existingBlock.partial).toBe(false) + expect((existingBlock as any).id).toBe("call_bad") + expect(stub.streamingToolCallIndices.has("call_bad")).toBe(false) + expect(stub.userMessageContentReady).toBe(false) + expect(mockedPresent).toHaveBeenCalledTimes(1) + }) + + it("is a no-op when the id is not tracked", () => { + vi.spyOn(NativeToolCallParser, "finalizeStreamingToolCall").mockReturnValue(null) + + const stub = { + assistantMessageContent: [] as any[], + streamingToolCallIndices: new Map(), + userMessageContentReady: true, + } + + callFinalize(stub, "call_unknown") + + expect(stub.assistantMessageContent).toHaveLength(0) + expect(stub.userMessageContentReady).toBe(true) + expect(mockedPresent).not.toHaveBeenCalled() + }) + + it("is idempotent: a second call for the same id does nothing", () => { + const finalToolUse = { type: "tool_use", name: "read_file", partial: false } as unknown as ToolUse + vi.spyOn(NativeToolCallParser, "finalizeStreamingToolCall") + .mockReturnValueOnce(finalToolUse) + .mockReturnValue(null) + + const stub = { + assistantMessageContent: [{ type: "tool_use", id: "call_once", name: "read_file", partial: true }], + streamingToolCallIndices: new Map([["call_once", 0]]), + userMessageContentReady: true, + } + + callFinalize(stub, "call_once") + callFinalize(stub, "call_once") // id no longer tracked -> no-op + + expect(mockedPresent).toHaveBeenCalledTimes(1) + }) +})