Zoo-Code-Org · HappyLiang12 · Jun 15, 2026 · Jun 19, 2026 · Jun 20, 2026 · Jun 21, 2026
@@ -34,6 +34,17 @@ jobs:
             - name: Install xvfb
               if: github.event_name != 'pull_request' || steps.e2e-marker.outputs.cache-hit != 'true'
               run: sudo apt-get install -y xvfb
+
+            - name: Cache VS Code test binary
+              if: github.event_name != 'pull_request' || steps.e2e-marker.outputs.cache-hit != 'true'
+              uses: actions/cache@v4
+              with:
+                  path: |
+                      apps/vscode-e2e/.vscode-test/
+                  key: vscode-test-${{ runner.os }}-v1
+                  restore-keys: |
+                      vscode-test-${{ runner.os }}-
+
             - name: Run mocked E2E tests
               id: run-e2e
               # merge_group and workflow_dispatch always run; cache skip is pull_request only

@@ -35,6 +35,7 @@ function installZAiFetchInterceptor(
 	passthrough?: boolean,
 ): () => void {
 	const original = globalThis.fetch
+	const capturedRequests: Array<{ maxTokens?: number }> = []
 
 	globalThis.fetch = async function (input: RequestInfo | URL, init?: RequestInit): Promise<Response> {
 		const url = typeof input === "string" ? input : input instanceof URL ? input.href : (input as Request).url
@@ -47,6 +48,8 @@ function installZAiFetchInterceptor(
 					})
 				: {}
 
+			capturedRequests.push({ maxTokens: body.max_tokens })
+
 			if (capture) {
 				capture.maxTokens = body.max_tokens
 			}
@@ -73,6 +76,10 @@ function installZAiFetchInterceptor(
 
 	return () => {
 		globalThis.fetch = original
+		const lastCaptured = capturedRequests[capturedRequests.length - 1]
+		if (capture && lastCaptured) {
+			capture.maxTokens = lastCaptured.maxTokens
+		}
 	}
 }
 
@@ -219,6 +226,8 @@ suite("Z.ai GLM provider", function () {
 		})
 
 		await waitUntilCompleted({ api, taskId })
+		// Allow any pending async requests to finish before snapshotting max_tokens
+		await new Promise((resolve) => setTimeout(resolve, 100))
 		const capturedMaxTokens = requestCapture.maxTokens
 
 		const completionMessage = messages.find(
@@ -229,8 +238,6 @@ suite("Z.ai GLM provider", function () {
 
 		// Verify max_tokens uses the restored default clamp (20% of context window)
 		// unless the user explicitly overrides it via modelMaxTokens.
-		// Snapshot immediately after waitUntilCompleted to avoid straggling async calls
-		// from this task overwriting requestCapture before the assertion runs.
 		assert.strictEqual(
 			capturedMaxTokens,
 			40_000,
@@ -263,6 +270,8 @@ suite("Z.ai GLM provider", function () {
 		})
 
 		await waitUntilCompleted({ api, taskId })
+		// Allow any pending async requests to finish before snapshotting max_tokens
+		await new Promise((resolve) => setTimeout(resolve, 100))
 		const capturedMaxTokens = requestCapture.maxTokens
 
 		const completionMessage = messages.find(
@@ -273,11 +282,10 @@ suite("Z.ai GLM provider", function () {
 
 		// Verify max_tokens uses the restored default clamp (20% of context window)
 		// unless the user explicitly overrides it via modelMaxTokens.
-		// Snapshot immediately after waitUntilCompleted to avoid straggling async calls
-		// from the prior test overwriting requestCapture before this assertion runs.
+		const expectedMaxTokens = 40_551 // Math.ceil(202_752 * 0.2) for glm-5-turbo
 		assert.strictEqual(
 			capturedMaxTokens,
-			40_551,
+			expectedMaxTokens,
 			`max_tokens should default to the glm-5-turbo clamp (40_551) but was ${capturedMaxTokens}`,
 		)
 	})

@@ -266,7 +266,7 @@ describe("VertexHandler", () => {
 					tools: expect.any(Array),
 					tool_choice: expect.any(Object),
 				}),
-				undefined,
+				expect.objectContaining({ signal: expect.any(AbortSignal) }),
 			)
 		})
 
@@ -481,7 +481,7 @@ describe("VertexHandler", () => {
 						}),
 					],
 				}),
-				undefined,
+				expect.objectContaining({ signal: expect.any(AbortSignal) }),
 			)
 		})
 
@@ -834,18 +834,21 @@ describe("VertexHandler", () => {
 
 			const result = await handler.completePrompt("Test prompt")
 			expect(result).toBe("Test response")
-			expect(handler["client"].messages.create).toHaveBeenCalledWith({
-				model: "claude-3-5-sonnet-v2@20241022",
-				max_tokens: 8192,
-				temperature: 0,
-				messages: [
-					{
-						role: "user",
-						content: [{ type: "text", text: "Test prompt", cache_control: { type: "ephemeral" } }],
-					},
-				],
-				stream: false,
-			})
+			expect(handler["client"].messages.create).toHaveBeenCalledWith(
+				{
+					model: "claude-3-5-sonnet-v2@20241022",
+					max_tokens: 8192,
+					temperature: 0,
+					messages: [
+						{
+							role: "user",
+							content: [{ type: "text", text: "Test prompt", cache_control: { type: "ephemeral" } }],
+						},
+					],
+					stream: false,
+				},
+				expect.objectContaining({ signal: expect.any(AbortSignal) }),
+			)
 		})
 
 		it("should handle API errors for Claude", async () => {
@@ -864,6 +867,19 @@ describe("VertexHandler", () => {
 			)
 		})
 
+		it("should rethrow non-Error values without wrapping", async () => {
+			handler = new AnthropicVertexHandler({
+				apiModelId: "claude-3-5-sonnet-v2@20241022",
+				vertexProjectId: "test-project",
+				vertexRegion: "us-central1",
+			})
+
+			const mockCreate = vitest.fn().mockRejectedValue("raw error")
+			;(handler["client"].messages as any).create = mockCreate
+
+			await expect(handler.completePrompt("Test prompt")).rejects.toBe("raw error")
+		})
+
 		it("should handle non-text content for Claude", async () => {
 			handler = new AnthropicVertexHandler({
 				apiModelId: "claude-3-5-sonnet-v2@20241022",
@@ -1156,7 +1172,10 @@ describe("VertexHandler", () => {
 			}
 
 			// Verify the API was called without the beta header
-			expect(mockCreate).toHaveBeenCalledWith(expect.anything(), undefined)
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.anything(),
+				expect.objectContaining({ signal: expect.any(AbortSignal) }),
+			)
 		})
 	})
 
@@ -1246,7 +1265,7 @@ describe("VertexHandler", () => {
 					thinking: { type: "enabled", budget_tokens: 4096 },
 					temperature: 1.0, // Thinking requires temperature 1.0
 				}),
-				undefined,
+				expect.objectContaining({ signal: expect.any(AbortSignal) }),
 			)
 		})
 
@@ -1273,7 +1292,7 @@ describe("VertexHandler", () => {
 				expect.objectContaining({
 					thinking: { type: "adaptive" },
 				}),
-				undefined,
+				expect.objectContaining({ signal: expect.any(AbortSignal) }),
 			)
 
 			const request = mockCreate.mock.calls[0][0]
@@ -1302,7 +1321,7 @@ describe("VertexHandler", () => {
 				expect.objectContaining({
 					thinking: { type: "adaptive" },
 				}),
-				undefined,
+				expect.objectContaining({ signal: expect.any(AbortSignal) }),
 			)
 
 			const request = mockCreate.mock.calls[0][0]
@@ -1393,7 +1412,7 @@ describe("VertexHandler", () => {
 					]),
 					tool_choice: { type: "auto", disable_parallel_tool_use: false },
 				}),
-				undefined,
+				expect.objectContaining({ signal: expect.any(AbortSignal) }),
 			)
 		})
 
@@ -1446,7 +1465,7 @@ describe("VertexHandler", () => {
 						}),
 					]),
 				}),
-				undefined,
+				expect.objectContaining({ signal: expect.any(AbortSignal) }),
 			)
 		})
 

@@ -428,20 +428,77 @@ describe("AnthropicHandler", () => {
 			expect(requestBody?.max_tokens).toBe(32768)
 			expect(requestOptions?.headers?.["anthropic-beta"]).toContain("prompt-caching-2024-07-31")
 		})
+
+		it("should pass signal for models outside the prompt-caching list", async () => {
+			// Mock getModel to return an ID not in the createMessage outer switch,
+			// so it hits the default (non-caching) path which passes { signal }.
+			const modelInfo = handler.getModel()
+			vitest.spyOn(handler, "getModel").mockReturnValue({
+				...modelInfo,
+				id: "non-cached-model",
+			} as any)
+
+			const stream = handler.createMessage(systemPrompt, [
+				{
+					role: "user",
+					content: [{ type: "text" as const, text: "Hello" }],
+				},
+			])
+
+			for await (const _chunk of stream) {
+				// Consume stream
+			}
+
+			// Verify messages.create was called with { signal } (no prompt-caching headers)
+			const requestOptions = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[1]
+			expect(requestOptions).toEqual(expect.objectContaining({ signal: expect.any(AbortSignal) }))
+			expect(requestOptions?.headers).toBeUndefined()
+		})
+
+		it("should pass signal when inner prompt-caching switch hits default", async () => {
+			// Mock getModel to return the coverage-inner-default model, which
+			// is in the outer prompt-caching switch but NOT in the inner
+			// prompt-caching beta switch, so the inner default returns { signal }.
+			const modelInfo = handler.getModel()
+			vitest.spyOn(handler, "getModel").mockReturnValue({
+				...modelInfo,
+				id: "coverage-inner-default",
+			} as any)
+
+			const stream = handler.createMessage(systemPrompt, [
+				{
+					role: "user",
+					content: [{ type: "text" as const, text: "Hello" }],
+				},
+			])
+
+			for await (const _chunk of stream) {
+				// Consume stream
+			}
+
+			// Verify messages.create was called with { signal } and no
+			// prompt-caching beta header (the inner switch default path).
+			const requestOptions = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[1]
+			expect(requestOptions).toEqual(expect.objectContaining({ signal: expect.any(AbortSignal) }))
+			expect(requestOptions?.headers).toBeUndefined()
+		})
 	})
 
 	describe("completePrompt", () => {
 		it("should complete prompt successfully", async () => {
 			const result = await handler.completePrompt("Test prompt")
 			expect(result).toBe("Test response")
-			expect(mockCreate).toHaveBeenCalledWith({
-				model: mockOptions.apiModelId,
-				messages: [{ role: "user", content: "Test prompt" }],
-				max_tokens: 8192,
-				temperature: 0,
-				thinking: undefined,
-				stream: false,
-			})
+			expect(mockCreate).toHaveBeenCalledWith(
+				{
+					model: mockOptions.apiModelId,
+					messages: [{ role: "user", content: "Test prompt" }],
+					max_tokens: 8192,
+					temperature: 0,
+					thinking: undefined,
+					stream: false,
+				},
+				expect.objectContaining({ signal: expect.any(AbortSignal) }),
+			)
 		})
 
 		it("should handle API errors", async () => {
@@ -1057,4 +1114,96 @@ describe("AnthropicHandler", () => {
 			})
 		})
 	})
+
+	describe("content_block_start and content_block_delta coverage", () => {
+		it("should handle thinking content_block_start", async () => {
+			mockCreate.mockImplementationOnce(async () => ({
+				async *[Symbol.asyncIterator]() {
+					yield { type: "message_start", message: { usage: { input_tokens: 1, output_tokens: 1 } } }
+					yield {
+						type: "content_block_start",
+						index: 0,
+						content_block: { type: "thinking", thinking: "let me think" },
+					}
+					yield { type: "message_stop" }
+				},
+			}))
+
+			const stream = handler.createMessage("sys", [{ role: "user", content: "hi" }])
+			const chunks: any[] = []
+			for await (const c of stream) {
+				chunks.push(c)
+			}
+			const reasoning = chunks.filter((c) => c.type === "reasoning")
+			expect(reasoning.some((c) => c.text === "let me think")).toBe(true)
+		})
+
+		it("should insert newline for second text content_block_start", async () => {
+			mockCreate.mockImplementationOnce(async () => ({
+				async *[Symbol.asyncIterator]() {
+					yield { type: "message_start", message: { usage: { input_tokens: 1, output_tokens: 1 } } }
+					yield {
+						type: "content_block_start",
+						index: 0,
+						content_block: { type: "text", text: "first" },
+					}
+					yield {
+						type: "content_block_start",
+						index: 1,
+						content_block: { type: "text", text: "" },
+					}
+					yield { type: "message_stop" }
+				},
+			}))
+
+			const stream = handler.createMessage("sys", [{ role: "user", content: "hi" }])
+			const chunks: any[] = []
+			for await (const c of stream) {
+				chunks.push(c)
+			}
+			const textChunks = chunks.filter((c) => c.type === "text")
+			expect(textChunks.some((c) => c.text === "\n")).toBe(true)
+		})
+
+		it("should handle thinking_delta", async () => {
+			mockCreate.mockImplementationOnce(async () => ({
+				async *[Symbol.asyncIterator]() {
+					yield { type: "message_start", message: { usage: { input_tokens: 1, output_tokens: 1 } } }
+					yield {
+						type: "content_block_delta",
+						delta: { type: "thinking_delta", thinking: "hmm" },
+					}
+					yield { type: "message_stop" }
+				},
+			}))
+
+			const stream = handler.createMessage("sys", [{ role: "user", content: "hi" }])
+			const chunks: any[] = []
+			for await (const c of stream) {
+				chunks.push(c)
+			}
+			const reasoning = chunks.filter((c) => c.type === "reasoning")
+			expect(reasoning.some((c) => c.text === "hmm")).toBe(true)
+		})
+
+		it("should yield usage from message_delta events", async () => {
+			mockCreate.mockImplementationOnce(async () => ({
+				async *[Symbol.asyncIterator]() {
+					yield { type: "message_start", message: { usage: { input_tokens: 1, output_tokens: 1 } } }
+					yield { type: "content_block_start", index: 0, content_block: { type: "text", text: "hi" } }
+					yield { type: "message_delta", usage: { output_tokens: 5 } }
+					yield { type: "message_stop" }
+				},
+			}))
+
+			const stream = handler.createMessage("sys", [{ role: "user", content: "hi" }])
+			const chunks: any[] = []
+			for await (const c of stream) {
+				chunks.push(c)
+			}
+			const usageChunks = chunks.filter((c) => c.type === "usage")
+			expect(usageChunks.length).toBeGreaterThanOrEqual(2)
+			expect(usageChunks[1]).toEqual({ type: "usage", inputTokens: 0, outputTokens: 5 })
+		})
+	})
 })