diff --git a/packages/types/src/__tests__/vscode-llm.spec.ts b/packages/types/src/__tests__/vscode-llm.spec.ts
new file mode 100644
index 000000000..7a2eabddf
--- /dev/null
+++ b/packages/types/src/__tests__/vscode-llm.spec.ts
@@ -0,0 +1,36 @@
+import { describe, it, expect } from "vitest"
+import { vscodeLlmModels, vscodeLlmDefaultModelId } from "../providers/vscode-llm.js"
+
+describe("vscodeLlmModels", () => {
+	it("exposes the opus-4.8 row with its measured maxInputTokens and contextWindow", () => {
+		// The VS Code LM API exposes only maxInputTokens; that is the value the UI reads from this
+		// table (useSelectedModel.ts). For claude-opus-4.8 the two fields intentionally DIVERGE:
+		// maxInputTokens (197897) is the enforced input ceiling, while contextWindow (679560) records
+		// the larger advertised window. The UI reads maxInputTokens, so the divergence is a deliberate
+		// tripwire — assert the actual on-disk literals rather than forcing equality.
+		expect(vscodeLlmModels).toHaveProperty("claude-opus-4.8")
+		expect(vscodeLlmModels["claude-opus-4.8"].contextWindow).toBe(679560)
+		expect(vscodeLlmModels["claude-opus-4.8"].maxInputTokens).toBe(197897)
+	})
+	it("preserves the real window for models captured with a smaller maxInputTokens", () => {
+		expect(vscodeLlmModels["gpt-4o-mini"].maxInputTokens).toBe(12078)
+		expect(vscodeLlmModels["gpt-4o-mini"].contextWindow).toBe(12078)
+		expect(vscodeLlmModels["gemini-2.5-pro"].contextWindow).toBe(108594)
+		expect(vscodeLlmModels["gemini-2.5-pro"].maxInputTokens).toBe(108594)
+	})
+	it("keeps both window fields populated and positive for every row", () => {
+		for (const [family, model] of Object.entries(vscodeLlmModels)) {
+			expect(model.contextWindow, `${family}: contextWindow must be a positive integer`).toBeGreaterThan(0)
+			expect(model.maxInputTokens, `${family}: maxInputTokens must be a positive integer`).toBeGreaterThan(0)
+		}
+	})
+	it("excludes fabricated/internal/alias families and the dropped legacy rows", () => {
+		expect(vscodeLlmModels).not.toHaveProperty("claude-opus-4.7-high")
+		expect(vscodeLlmModels).not.toHaveProperty("claude-3.5-sonnet")
+		expect(vscodeLlmModels).not.toHaveProperty("claude-4-sonnet")
+	})
+	it("defaults to a model id that exists in the table", () => {
+		expect(vscodeLlmDefaultModelId).toBe("claude-sonnet-4.5")
+		expect(vscodeLlmModels).toHaveProperty(vscodeLlmDefaultModelId)
+	})
+})
diff --git a/packages/types/src/providers/vscode-llm.ts b/packages/types/src/providers/vscode-llm.ts
index efe069191..46df75fac 100644
--- a/packages/types/src/providers/vscode-llm.ts
+++ b/packages/types/src/providers/vscode-llm.ts
@@ -2,189 +2,222 @@ import type { ModelInfo } from "../model.js"
 
 export type VscodeLlmModelId = keyof typeof vscodeLlmModels
 
-export const vscodeLlmDefaultModelId: VscodeLlmModelId = "claude-3.5-sonnet"
+export const vscodeLlmDefaultModelId: VscodeLlmModelId = "claude-sonnet-4.5"
 
-// https://docs.cline.bot/provider-config/vscode-language-model-api
+// Curated VS Code LM (GitHub Copilot) model catalog.
+//
+// The VS Code LM API only exposes `maxInputTokens` per model; it does NOT report a separate
+// total context window. For each row, `contextWindow` records the model's advertised window
+// while `maxInputTokens` is the enforced input ceiling the UI actually reads (via
+// useSelectedModel.ts) and the condense gate measures against. For most rows the two values
+// match. They intentionally DIVERGE only where the provider advertises a larger window than the
+// usable input ceiling (e.g. claude-opus-4.8): keeping both fields lets the context bar and the
+// auto-condense gate stay on a single source of truth (maxInputTokens) without losing the real
+// advertised window.
 export const vscodeLlmModels = {
-	"gpt-3.5-turbo": {
-		contextWindow: 12114,
-		supportsImages: false,
+	"claude-opus-4.8": {
+		contextWindow: 679560,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-3.5-turbo",
-		version: "gpt-3.5-turbo-0613",
-		name: "GPT 3.5 Turbo",
+		family: "claude-opus-4.8",
+		version: "claude-opus-4.8",
+		name: "Claude Opus 4.8",
 		supportsToolCalling: true,
-		maxInputTokens: 12114,
+		maxInputTokens: 197897,
 	},
-	"gpt-4o-mini": {
-		contextWindow: 12115,
-		supportsImages: false,
+	"claude-opus-4.7": {
+		contextWindow: 197897,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4o-mini",
-		version: "gpt-4o-mini-2024-07-18",
-		name: "GPT-4o mini",
+		family: "claude-opus-4.7",
+		version: "claude-opus-4.7",
+		name: "Claude Opus 4.7",
 		supportsToolCalling: true,
-		maxInputTokens: 12115,
+		maxInputTokens: 197897,
 	},
-	"gpt-4": {
-		contextWindow: 28501,
-		supportsImages: false,
+	"claude-opus-4.6": {
+		contextWindow: 197897,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4",
-		version: "gpt-4-0613",
-		name: "GPT 4",
+		family: "claude-opus-4.6",
+		version: "claude-opus-4.6",
+		name: "Claude Opus 4.6",
 		supportsToolCalling: true,
-		maxInputTokens: 28501,
+		maxInputTokens: 197897,
 	},
-	"gpt-4-0125-preview": {
-		contextWindow: 63826,
-		supportsImages: false,
+	"claude-opus-4.5": {
+		contextWindow: 167790,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4-turbo",
-		version: "gpt-4-0125-preview",
-		name: "GPT 4 Turbo",
+		family: "claude-opus-4.5",
+		version: "claude-opus-4.5",
+		name: "Claude Opus 4.5",
 		supportsToolCalling: true,
-		maxInputTokens: 63826,
+		maxInputTokens: 167790,
 	},
-	"gpt-4o": {
-		contextWindow: 63827,
+	"claude-sonnet-4.6": {
+		contextWindow: 197896,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4o",
-		version: "gpt-4o-2024-11-20",
-		name: "GPT-4o",
+		family: "claude-sonnet-4.6",
+		version: "claude-sonnet-4.6",
+		name: "Claude Sonnet 4.6",
 		supportsToolCalling: true,
-		maxInputTokens: 63827,
+		maxInputTokens: 197896,
 	},
-	o1: {
-		contextWindow: 19827,
-		supportsImages: false,
+	"claude-sonnet-4.5": {
+		contextWindow: 167790,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "o1-ga",
-		version: "o1-2024-12-17",
-		name: "o1 (Preview)",
+		family: "claude-sonnet-4.5",
+		version: "claude-sonnet-4.5",
+		name: "Claude Sonnet 4.5",
 		supportsToolCalling: true,
-		maxInputTokens: 19827,
+		maxInputTokens: 167790,
 	},
-	"o3-mini": {
-		contextWindow: 63827,
-		supportsImages: false,
+	"claude-haiku-4.5": {
+		contextWindow: 135790,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "o3-mini",
-		version: "o3-mini-2025-01-31",
-		name: "o3-mini",
+		family: "claude-haiku-4.5",
+		version: "claude-haiku-4.5",
+		name: "Claude Haiku 4.5",
 		supportsToolCalling: true,
-		maxInputTokens: 63827,
+		maxInputTokens: 135790,
 	},
-	"claude-3.5-sonnet": {
-		contextWindow: 81638,
+	"gpt-5.5": {
+		contextWindow: 268426,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "claude-3.5-sonnet",
-		version: "claude-3.5-sonnet",
-		name: "Claude 3.5 Sonnet",
+		family: "gpt-5.5",
+		version: "gpt-5.5",
+		name: "GPT-5.5",
 		supportsToolCalling: true,
-		maxInputTokens: 81638,
+		maxInputTokens: 268426,
 	},
-	"claude-4-sonnet": {
-		contextWindow: 128000,
+	"gpt-5.4": {
+		contextWindow: 268424,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "claude-sonnet-4",
-		version: "claude-sonnet-4",
-		name: "Claude Sonnet 4",
+		family: "gpt-5.4",
+		version: "gpt-5.4",
+		name: "GPT-5.4",
 		supportsToolCalling: true,
-		maxInputTokens: 111836,
+		maxInputTokens: 268424,
 	},
-	"gemini-2.0-flash-001": {
-		contextWindow: 127827,
+	"gpt-5.4-mini": {
+		contextWindow: 271790,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gemini-2.0-flash",
-		version: "gemini-2.0-flash-001",
-		name: "Gemini 2.0 Flash",
-		supportsToolCalling: false,
-		maxInputTokens: 127827,
+		family: "gpt-5.4-mini",
+		version: "gpt-5.4-mini",
+		name: "GPT-5.4 mini",
+		supportsToolCalling: true,
+		maxInputTokens: 271790,
 	},
-	"gemini-2.5-pro": {
-		contextWindow: 128000,
+	"gpt-5.3-codex": {
+		contextWindow: 271790,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gemini-2.5-pro",
-		version: "gemini-2.5-pro-preview-03-25",
-		name: "Gemini 2.5 Pro (Preview)",
+		family: "gpt-5.3-codex",
+		version: "gpt-5.3-codex",
+		name: "GPT-5.3-Codex",
 		supportsToolCalling: true,
-		maxInputTokens: 108637,
+		maxInputTokens: 271790,
 	},
-	"o4-mini": {
-		contextWindow: 128000,
+	"gpt-5-mini": {
+		contextWindow: 127790,
+		supportsImages: true,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		family: "gpt-5-mini",
+		version: "gpt-5-mini",
+		name: "GPT-5 mini",
+		supportsToolCalling: true,
+		maxInputTokens: 127790,
+	},
+	"gpt-4o-mini": {
+		contextWindow: 12078,
 		supportsImages: false,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "o4-mini",
-		version: "o4-mini-2025-04-16",
-		name: "o4-mini (Preview)",
+		family: "gpt-4o-mini",
+		version: "gpt-4o-mini-2024-07-18",
+		name: "GPT-4o mini",
 		supportsToolCalling: true,
-		maxInputTokens: 111452,
+		maxInputTokens: 12078,
 	},
-	"gpt-4.1": {
-		contextWindow: 128000,
+	"gemini-3.1-pro-preview": {
+		contextWindow: 197897,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4.1",
-		version: "gpt-4.1-2025-04-14",
-		name: "GPT-4.1 (Preview)",
+		family: "gemini-3.1-pro-preview",
+		version: "gemini-3.1-pro-preview",
+		name: "Gemini 3.1 Pro (Preview)",
 		supportsToolCalling: true,
-		maxInputTokens: 111452,
+		maxInputTokens: 197897,
 	},
-	"gpt-5-mini": {
-		contextWindow: 128000,
+	"gemini-3.5-flash": {
+		contextWindow: 197895,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-5-mini",
-		version: "gpt-5-mini",
-		name: "GPT-5 mini (Preview)",
+		family: "gemini-3.5-flash",
+		version: "gemini-3.5-flash",
+		name: "Gemini 3.5 Flash",
+		supportsToolCalling: true,
+		maxInputTokens: 197895,
+	},
+	"gemini-3-flash": {
+		contextWindow: 108594,
+		supportsImages: true,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		family: "gemini-3-flash",
+		version: "gemini-3-flash-preview",
+		name: "Gemini 3 Flash (Preview)",
 		supportsToolCalling: true,
-		maxInputTokens: 108637,
+		maxInputTokens: 108594,
 	},
-	"gpt-5": {
-		contextWindow: 128000,
+	"gemini-2.5-pro": {
+		contextWindow: 108594,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-5",
-		version: "gpt-5",
-		name: "GPT-5 (Preview)",
+		family: "gemini-2.5-pro",
+		version: "gemini-2.5-pro",
+		name: "Gemini 2.5 Pro",
 		supportsToolCalling: true,
-		maxInputTokens: 108637,
+		maxInputTokens: 108594,
 	},
 } as const satisfies Record<
 	string,
diff --git a/src/api/index.ts b/src/api/index.ts
index 0c901f8e2..00201b0d2 100644
--- a/src/api/index.ts
+++ b/src/api/index.ts
@@ -107,6 +107,17 @@ export interface ApiHandler {
 
 	getModel(): { id: string; info: ModelInfo }
 
+	/**
+	 * Optional: the context window (in tokens) to use for context-management /
+	 * auto-condense decisions, when it must differ from getModel().info.contextWindow.
+	 *
+	 * Only the VS Code LM (Copilot) provider overrides this, to measure usage against the
+	 * model's static `maxInputTokens` instead of the inflated live window VS Code reports.
+	 * Other providers leave it undefined and callers fall back to getModel().info.contextWindow,
+	 * so their behavior is unchanged.
+	 */
+	getCondenseContextWindow?(): number
+
 	/**
 	 * Counts tokens for content blocks
 	 * All providers extend BaseProvider which provides a default tiktoken implementation,
diff --git a/src/api/providers/__tests__/vscode-lm.spec.ts b/src/api/providers/__tests__/vscode-lm.spec.ts
index a79a5a4bc..eb026e816 100644
--- a/src/api/providers/__tests__/vscode-lm.spec.ts
+++ b/src/api/providers/__tests__/vscode-lm.spec.ts
@@ -63,6 +63,7 @@ import * as vscode from "vscode"
 import { VsCodeLmHandler } from "../vscode-lm"
 import type { ApiHandlerOptions } from "../../../shared/api"
 import type { Anthropic } from "@anthropic-ai/sdk"
+import { openAiModelInfoSaneDefaults, vscodeLlmModels } from "@roo-code/types"
 
 const mockLanguageModelChat = {
 	id: "test-model",
@@ -440,6 +441,88 @@ describe("VsCodeLmHandler", () => {
 			const model = handler.getModel()
 			expect(model.info).toBeDefined()
 		})
+
+		it("should use the full advertised maxInputTokens without an upper cap", async () => {
+			// VS Code can report a very large advertised window; getModel surfaces it as-is
+			// (Math.max(0, maxInputTokens)) rather than clamping to a smaller default.
+			const mockModel = { ...mockLanguageModelChat, maxInputTokens: 936000 }
+			;(vscode.lm.selectChatModels as Mock).mockResolvedValue([mockModel])
+			handler["client"] = null
+			await handler.initializeClient()
+
+			const model = handler.getModel()
+			expect(model.info.contextWindow).toBe(936000)
+		})
+
+		it("should pass through a small maxInputTokens unchanged", async () => {
+			const mockModel = { ...mockLanguageModelChat, maxInputTokens: 4096 }
+			;(vscode.lm.selectChatModels as Mock).mockResolvedValue([mockModel])
+			handler["client"] = null
+			await handler.initializeClient()
+
+			const model = handler.getModel()
+			expect(model.info.contextWindow).toBe(4096)
+		})
+
+		it("should fall back to sane defaults when maxInputTokens is not a number", async () => {
+			const mockModel = { ...mockLanguageModelChat, maxInputTokens: undefined as unknown as number }
+			;(vscode.lm.selectChatModels as Mock).mockResolvedValue([mockModel])
+			handler["client"] = null
+			await handler.initializeClient()
+
+			const model = handler.getModel()
+			expect(model.info.contextWindow).toBe(openAiModelInfoSaneDefaults.contextWindow)
+		})
+	})
+
+	describe("getCondenseContextWindow", () => {
+		it("uses the static-table maxInputTokens for a known VS Code LM family", () => {
+			const opusHandler = new VsCodeLmHandler({
+				vsCodeLmModelSelector: { vendor: "copilot", family: "claude-opus-4.8" },
+			})
+			expect(opusHandler.getCondenseContextWindow()).toBe(vscodeLlmModels["claude-opus-4.8"].maxInputTokens)
+			opusHandler.dispose()
+		})
+
+		it("falls back to the live model context window for families not in the static table", () => {
+			// test-family is not a curated row, so the gate uses the live runtime window.
+			handler["client"] = mockLanguageModelChat as unknown as vscode.LanguageModelChat
+			expect(handler.getCondenseContextWindow()).toBe(handler.getModel().info.contextWindow)
+			expect(handler.getCondenseContextWindow()).toBe(mockLanguageModelChat.maxInputTokens)
+		})
+
+		it("falls back to the live window when no family is resolvable (no client, no selector family)", () => {
+			// With neither a client nor a selector family, `family` is undefined, so the static-table
+			// lookup is skipped entirely and the gate uses getModel().info.contextWindow (fallback info).
+			const noFamilyHandler = new VsCodeLmHandler({ vsCodeLmModelSelector: { vendor: "copilot" } })
+			noFamilyHandler["client"] = null
+			expect(noFamilyHandler.getCondenseContextWindow()).toBe(noFamilyHandler.getModel().info.contextWindow)
+			expect(noFamilyHandler.getCondenseContextWindow()).toBe(openAiModelInfoSaneDefaults.contextWindow)
+			noFamilyHandler.dispose()
+		})
+
+		it("falls back to the derived window when the static row exists but maxInputTokens is non-positive", () => {
+			// Guard sub-condition: a curated family is found but its maxInputTokens is <= 0 (corrupt/zeroed).
+			// With the selector family `claude-opus-4.8` and no live client, the zeroed static row is the one
+			// consulted, so the `maxInputTokens > 0` guard fails and the gate falls back to the derived window
+			// from getModel().info.contextWindow (sane defaults here, since there is no live client).
+			const family = "claude-opus-4.8"
+			const original = vscodeLlmModels[family].maxInputTokens
+			try {
+				;(vscodeLlmModels[family] as { maxInputTokens: number }).maxInputTokens = 0
+				const guardHandler = new VsCodeLmHandler({
+					vsCodeLmModelSelector: { vendor: "copilot", family },
+				})
+				// Leave the client unset so `family` resolves from the selector (claude-opus-4.8),
+				// forcing the zeroed static row to be read instead of a live client's family.
+				guardHandler["client"] = null
+				expect(guardHandler.getCondenseContextWindow()).toBe(guardHandler.getModel().info.contextWindow)
+				expect(guardHandler.getCondenseContextWindow()).toBe(openAiModelInfoSaneDefaults.contextWindow)
+				guardHandler.dispose()
+			} finally {
+				;(vscodeLlmModels[family] as { maxInputTokens: number }).maxInputTokens = original
+			}
+		})
 	})
 
 	describe("countTokens", () => {
diff --git a/src/api/providers/vscode-lm.ts b/src/api/providers/vscode-lm.ts
index 8fb564a9d..d730658b4 100644
--- a/src/api/providers/vscode-lm.ts
+++ b/src/api/providers/vscode-lm.ts
@@ -2,7 +2,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
 import * as vscode from "vscode"
 import OpenAI from "openai"
 
-import { type ModelInfo, openAiModelInfoSaneDefaults } from "@roo-code/types"
+import { type ModelInfo, openAiModelInfoSaneDefaults, vscodeLlmModels } from "@roo-code/types"
 
 import type { ApiHandlerOptions } from "../../shared/api"
 import { SELECTOR_SEPARATOR, stringifyVsCodeLmModelSelector } from "../../shared/vsCodeSelectorUtils"
@@ -562,6 +562,28 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
 		}
 	}
 
+	/**
+	 * Context window used for auto-condense / context-management decisions.
+	 *
+	 * VS Code's LM API reports `client.maxInputTokens` as Copilot's *advertised* window,
+	 * which is far larger than the realistic usable window; relying on it keeps auto-condense
+	 * from ever firing. For condense decisions we instead measure usage against the curated
+	 * static table's `maxInputTokens` — the same value the context bar uses via
+	 * `useSelectedModel` — so the gate and the gauge stay on one source of truth.
+	 *
+	 * Falls back to the live runtime window when the selected model isn't in the static table.
+	 */
+	getCondenseContextWindow(): number {
+		const family = this.client?.family ?? this.options.vsCodeLmModelSelector?.family
+		const staticModel = family ? vscodeLlmModels[family as keyof typeof vscodeLlmModels] : undefined
+
+		if (staticModel && typeof staticModel.maxInputTokens === "number" && staticModel.maxInputTokens > 0) {
+			return staticModel.maxInputTokens
+		}
+
+		return this.getModel().info.contextWindow
+	}
+
 	async completePrompt(prompt: string): Promise<string> {
 		try {
 			const client = await this.getClient()
diff --git a/src/core/context-management/__tests__/context-management.spec.ts b/src/core/context-management/__tests__/context-management.spec.ts
index 9950ec536..ba0a77aac 100644
--- a/src/core/context-management/__tests__/context-management.spec.ts
+++ b/src/core/context-management/__tests__/context-management.spec.ts
@@ -810,9 +810,10 @@ describe("Context Management", () => {
 			const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
 
 			const modelInfo = createModelInfo(100000, 30000)
-			// Set tokens to be below both the allowedTokens threshold and the percentage threshold
+			// Usage is measured against available input space (contextWindow - maxTokens reserve).
+			// available = 100000 - 30000 = 70000; 30000 / 70000 ≈ 43% < 50% threshold.
 			const contextWindow = modelInfo.contextWindow
-			const totalTokens = 40000 // 40% of context window
+			const totalTokens = 30000
 			const messagesWithSmallContent = [
 				...messages.slice(0, -1),
 				{ ...messages[messages.length - 1], content: "" },
@@ -825,7 +826,7 @@ describe("Context Management", () => {
 				maxTokens: modelInfo.maxTokens,
 				apiHandler: mockApiHandler,
 				autoCondenseContext: true,
-				autoCondenseContextPercent: 50, // Set threshold to 50% - our tokens are at 40%
+				autoCondenseContextPercent: 50, // Set threshold to 50% - usage is ~43% of available input
 				systemPrompt: "System prompt",
 				taskId,
 				profileThresholds: {},
@@ -1507,19 +1508,42 @@ describe("Context Management", () => {
 		})
 
 		it("should return false when context percent is below threshold", () => {
+			// Available-input denominator (opt-in): available = 100000 - 30000 = 70000;
+			// 30000 / 70000 ≈ 43% < 50% threshold.
 			const result = willManageContext({
-				totalTokens: 40000,
-				contextWindow: 100000, // 40% of context window
+				totalTokens: 30000,
+				contextWindow: 100000,
 				maxTokens: 30000,
 				autoCondenseContext: true,
-				autoCondenseContextPercent: 50, // 50% threshold
+				autoCondenseContextPercent: 50, // 50% threshold; usage is ~43% of available input
 				profileThresholds: {},
 				currentProfileId: "default",
 				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
 			})
 			expect(result).toBe(false)
 		})
 
+		it("should treat a negative maxTokens (vscode-lm reports -1) as the default reserve, not -1", () => {
+			// vscode-lm reports maxTokens: -1 (unlimited). A naive `maxTokens || DEFAULT` keeps -1,
+			// which would make allowedTokens balloon past the window and skew the percentage. The
+			// guard must treat -1 like an unknown reserve (ANTHROPIC_DEFAULT_MAX_TOKENS for the
+			// allowed-tokens math, zero reserve for the available-input percentage).
+			// With autoCondenseContext disabled, only the allowedTokens path can trigger:
+			// allowedTokens = 100000 * 0.9 - 8192 = 81808; totalTokens 85000 > 81808 → true.
+			const result = willManageContext({
+				totalTokens: 85000,
+				contextWindow: 100000,
+				maxTokens: -1,
+				autoCondenseContext: false,
+				autoCondenseContextPercent: 50,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+			})
+			expect(result).toBe(true)
+		})
+
 		it("should return true when tokens exceed allowedTokens even if autoCondenseContext is false", () => {
 			// allowedTokens = contextWindow * (1 - 0.1) - reservedTokens = 100000 * 0.9 - 30000 = 60000
 			const result = willManageContext({
@@ -1581,10 +1605,12 @@ describe("Context Management", () => {
 		})
 
 		it("should include lastMessageTokens in the calculation", () => {
-			// Without lastMessageTokens: 49000 tokens = 49%
-			// With lastMessageTokens: 49000 + 2000 = 51000 tokens = 51%
+			// Available-input denominator (opt-in): available = 100000 - 30000 = 70000.
+			// Without lastMessageTokens: 34000 / 70000 ≈ 48.6% < 50% threshold.
+			// With lastMessageTokens: (34000 + 2000) / 70000 ≈ 51.4% ≥ 50% threshold.
+			// (Against the full window both cases are < 50%, so this case requires the opt-in flag.)
 			const resultWithoutLastMessage = willManageContext({
-				totalTokens: 49000,
+				totalTokens: 34000,
 				contextWindow: 100000,
 				maxTokens: 30000,
 				autoCondenseContext: true,
@@ -1592,18 +1618,20 @@ describe("Context Management", () => {
 				profileThresholds: {},
 				currentProfileId: "default",
 				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
 			})
 			expect(resultWithoutLastMessage).toBe(false)
 
 			const resultWithLastMessage = willManageContext({
-				totalTokens: 49000,
+				totalTokens: 34000,
 				contextWindow: 100000,
 				maxTokens: 30000,
 				autoCondenseContext: true,
 				autoCondenseContextPercent: 50, // 50% threshold
 				profileThresholds: {},
 				currentProfileId: "default",
-				lastMessageTokens: 2000, // Pushes total to 51%
+				lastMessageTokens: 2000, // Pushes usage over 50% of available input
+				useAvailableInputForContextPercent: true,
 			})
 			expect(resultWithLastMessage).toBe(true)
 		})
@@ -1701,4 +1729,313 @@ describe("Context Management", () => {
 			expect(result.newContextTokensAfterTruncation).toBeGreaterThan(0)
 		})
 	})
+
+	/**
+	 * Regression tests for the opt-in available-input denominator (vscode-lm). With the flag on,
+	 * the condense gate measures usage against available input space (contextWindow - reserved
+	 * output), not the raw context window. This keeps the gate in lockstep with the UI context
+	 * gauge and ensures it actually fires for vscode-lm, which reports maxTokens: -1. The default
+	 * (full-window) behavior for every other provider is covered by the sibling describe below.
+	 */
+	describe("contextPercent uses available input space (opt-in, regression)", () => {
+		const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({
+			contextWindow,
+			supportsPromptCache: true,
+			maxTokens,
+		})
+
+		const messages: ApiMessage[] = [
+			{ role: "user", content: "First message" },
+			{ role: "assistant", content: "Second message" },
+			{ role: "user", content: "Third message" },
+			{ role: "assistant", content: "Fourth message" },
+			{ role: "user", content: "Fifth message" },
+		]
+
+		it("willManageContext measures the percentage against available input, not the full window", () => {
+			// contextWindow 200000, reserve 64000 → available input 136000.
+			// totalTokens 100000 → 100000 / 136000 ≈ 73.5%, which clears the 70% threshold.
+			// Against the full window it would be only 50% and the gate would (wrongly) stay closed.
+			const result = willManageContext({
+				totalTokens: 100000,
+				contextWindow: 200000,
+				maxTokens: 64000,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
+			})
+			expect(result).toBe(true)
+		})
+
+		it("willManageContext stays below threshold when usage is under available input", () => {
+			// available input 136000; totalTokens 90000 → ≈ 66.2% < 70% threshold.
+			const result = willManageContext({
+				totalTokens: 90000,
+				contextWindow: 200000,
+				maxTokens: 64000,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
+			})
+			expect(result).toBe(false)
+		})
+
+		it("willManageContext treats an unlimited (-1) reserve as zero reserve for the percentage", () => {
+			// vscode-lm reports maxTokens: -1. The percentage denominator should fall back to the
+			// full window (zero reserve): 150000 / 200000 = 75% ≥ 70% threshold.
+			const result = willManageContext({
+				totalTokens: 150000,
+				contextWindow: 200000,
+				maxTokens: -1,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
+			})
+			expect(result).toBe(true)
+		})
+
+		it("willManageContext falls back to 100% when the reserve is >= the window (availableInput <= 0)", () => {
+			// When maxTokens (reserve) >= contextWindow, availableInputTokens = window - reserve <= 0.
+			// The denominator guard must short-circuit contextPercent to 100 rather than divide by
+			// a non-positive number, so the gate fires regardless of the (tiny) totalTokens.
+			const result = willManageContext({
+				totalTokens: 1,
+				contextWindow: 50000,
+				maxTokens: 60000, // reserve > window → availableInput = -10000
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 80,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
+			})
+			// contextPercent === 100 >= 80 threshold → true.
+			expect(result).toBe(true)
+		})
+
+		it("willManageContext falls back to 100% when the reserve exactly equals the window (availableInput === 0)", () => {
+			// Boundary: reserve === window → availableInputTokens === 0, still the FALSE branch (> 0 is false).
+			const result = willManageContext({
+				totalTokens: 1,
+				contextWindow: 50000,
+				maxTokens: 50000,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 90,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
+			})
+			expect(result).toBe(true)
+		})
+
+		it("manageContext summarizes via the 100% fallback when the reserve >= the window (availableInput <= 0)", async () => {
+			// Mirror the willManageContext edge for the manageContext path: reserve >= window forces
+			// contextPercent to 100 via the denominator guard, so summarization triggers even though
+			// totalTokens is small relative to the raw window.
+			const mockSummary = "Reserve-exceeds-window summary"
+			const mockSummarizeResponse: condenseModule.SummarizeResponse = {
+				messages: [
+					{ role: "user", content: "First message" },
+					{ role: "user", content: mockSummary, isSummary: true },
+					{ role: "assistant", content: "Last message" },
+				],
+				summary: mockSummary,
+				cost: 0.05,
+				newContextTokens: 100,
+			}
+			const summarizeSpy = vi
+				.spyOn(condenseModule, "summarizeConversation")
+				.mockResolvedValue(mockSummarizeResponse)
+
+			// contextWindow 50000, maxTokens 60000 → availableInput = -10000 → contextPercent = 100.
+			const messagesWithSmallContent = [
+				...messages.slice(0, -1),
+				{ ...messages[messages.length - 1], content: "" },
+			]
+
+			const result = await manageContext({
+				messages: messagesWithSmallContent,
+				totalTokens: 1,
+				contextWindow: 50000,
+				maxTokens: 60000,
+				apiHandler: mockApiHandler,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 80,
+				systemPrompt: "System prompt",
+				taskId,
+				profileThresholds: {},
+				currentProfileId: "default",
+				useAvailableInputForContextPercent: true,
+			})
+
+			expect(summarizeSpy).toHaveBeenCalled()
+			expect(result).toMatchObject({
+				summary: mockSummary,
+				prevContextTokens: 1,
+			})
+
+			summarizeSpy.mockRestore()
+		})
+
+		it("manageContext summarizes based on available input space, end-to-end", async () => {
+			const mockSummary = "Available-input summary"
+			const mockSummarizeResponse: condenseModule.SummarizeResponse = {
+				messages: [
+					{ role: "user", content: "First message" },
+					{ role: "user", content: mockSummary, isSummary: true },
+					{ role: "assistant", content: "Last message" },
+				],
+				summary: mockSummary,
+				cost: 0.05,
+				newContextTokens: 100,
+			}
+			const summarizeSpy = vi
+				.spyOn(condenseModule, "summarizeConversation")
+				.mockResolvedValue(mockSummarizeResponse)
+
+			const modelInfo = createModelInfo(200000, 64000)
+			// available input 136000; totalTokens 100000 → ≈ 73.5% ≥ 70% threshold, but only 50% of
+			// the raw window. The end-to-end path must trigger summarization on the available-input math.
+			const totalTokens = 100000
+			const messagesWithSmallContent = [
+				...messages.slice(0, -1),
+				{ ...messages[messages.length - 1], content: "" },
+			]
+
+			const result = await manageContext({
+				messages: messagesWithSmallContent,
+				totalTokens,
+				contextWindow: modelInfo.contextWindow,
+				maxTokens: modelInfo.maxTokens,
+				apiHandler: mockApiHandler,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				systemPrompt: "System prompt",
+				taskId,
+				profileThresholds: {},
+				currentProfileId: "default",
+				useAvailableInputForContextPercent: true,
+			})
+
+			expect(summarizeSpy).toHaveBeenCalled()
+			expect(result).toMatchObject({
+				summary: mockSummary,
+				prevContextTokens: totalTokens,
+			})
+
+			summarizeSpy.mockRestore()
+		})
+	})
+
+	/**
+	 * Scoping tests: the available-input denominator is opt-in. By default (flag omitted), the gate
+	 * divides by the FULL context window, exactly as every non-vscode-lm provider did before the
+	 * vscode-lm fix. The maxTokens: -1 reserve guard, however, remains global on the default path.
+	 */
+	describe("contextPercent denominator is opt-in (default = full window)", () => {
+		const messages: ApiMessage[] = [
+			{ role: "user", content: "First message" },
+			{ role: "assistant", content: "Second message" },
+			{ role: "user", content: "Third message" },
+			{ role: "assistant", content: "Fourth message" },
+			{ role: "user", content: "Fifth message" },
+		]
+
+		it("willManageContext divides by the full window when the flag is omitted (default)", () => {
+			// Same inputs as the regression block: contextWindow 200000, reserve 64000, totalTokens 100000.
+			// Default (full window): 100000 / 200000 = 50% < 70% threshold → false. Under the opt-in
+			// available-input math it would be ≈ 73.5% and fire — this proves the scoping.
+			const result = willManageContext({
+				totalTokens: 100000,
+				contextWindow: 200000,
+				maxTokens: 64000,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+			})
+			expect(result).toBe(false)
+		})
+
+		it("willManageContext fires on the same inputs when the opt-in flag is true", () => {
+			// Identical inputs, flag on: available input 136000 → 100000 / 136000 ≈ 73.5% ≥ 70% → true.
+			const result = willManageContext({
+				totalTokens: 100000,
+				contextWindow: 200000,
+				maxTokens: 64000,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
+			})
+			expect(result).toBe(true)
+		})
+
+		it("keeps the maxTokens:-1 reserve guard on the default (full-window) path", () => {
+			// The reserve guard is global, independent of the percent denominator. With auto-condense
+			// off, only the allowedTokens path can fire: allowedTokens = 100000 * 0.9 - 8192 = 81808;
+			// totalTokens 85000 > 81808 → true. (A naive `maxTokens || DEFAULT` keeping -1 would break this.)
+			const result = willManageContext({
+				totalTokens: 85000,
+				contextWindow: 100000,
+				maxTokens: -1,
+				autoCondenseContext: false,
+				autoCondenseContextPercent: 50,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+			})
+			expect(result).toBe(true)
+		})
+
+		it("manageContext does NOT summarize on the default path where the opt-in math would have", async () => {
+			// contextWindow 200000, reserve 64000, totalTokens 100000. Default full-window percent is
+			// 50% < 70% threshold, and allowedTokens = 200000 * 0.9 - 64000 = 116000 > 100000, so neither
+			// condense nor truncation runs. With the opt-in flag this same case summarizes (asserted above
+			// in the regression block), proving the default path reverts to pre-fix behavior.
+			const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
+
+			const messagesWithSmallContent = [
+				...messages.slice(0, -1),
+				{ ...messages[messages.length - 1], content: "" },
+			]
+
+			const result = await manageContext({
+				messages: messagesWithSmallContent,
+				totalTokens: 100000,
+				contextWindow: 200000,
+				maxTokens: 64000,
+				apiHandler: mockApiHandler,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				systemPrompt: "System prompt",
+				taskId,
+				profileThresholds: {},
+				currentProfileId: "default",
+			})
+
+			expect(summarizeSpy).not.toHaveBeenCalled()
+			expect(result).toEqual({
+				messages: messagesWithSmallContent,
+				summary: "",
+				cost: 0,
+				prevContextTokens: 100000,
+			})
+
+			summarizeSpy.mockRestore()
+		})
+	})
 })
diff --git a/src/core/context-management/index.ts b/src/core/context-management/index.ts
index 243d7bd79..b4d89487f 100644
--- a/src/core/context-management/index.ts
+++ b/src/core/context-management/index.ts
@@ -147,6 +147,14 @@ export type WillManageContextOptions = {
 	profileThresholds: Record<string, number>
 	currentProfileId: string
 	lastMessageTokens: number
+	/**
+	 * Opt-in: measure the condense percentage against the available input space
+	 * (contextWindow - reserved output) instead of the full context window. Only providers
+	 * whose advertised live window is inflated relative to the usable input ceiling (vscode-lm,
+	 * which exposes the seam via getCondenseContextWindow) set this. All other providers leave it
+	 * undefined and keep dividing by the full context window (original behavior).
+	 */
+	useAvailableInputForContextPercent?: boolean
 }
 
 /**
@@ -167,16 +175,19 @@ export function willManageContext({
 	profileThresholds,
 	currentProfileId,
 	lastMessageTokens,
+	useAvailableInputForContextPercent,
 }: WillManageContextOptions): boolean {
 	if (!autoCondenseContext) {
 		// When auto-condense is disabled, only truncation can occur
-		const reservedTokens = maxTokens || ANTHROPIC_DEFAULT_MAX_TOKENS
+		// vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math.
+		const reservedTokens = maxTokens && maxTokens > 0 ? maxTokens : ANTHROPIC_DEFAULT_MAX_TOKENS
 		const prevContextTokens = totalTokens + lastMessageTokens
 		const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens
 		return prevContextTokens > allowedTokens
 	}
 
-	const reservedTokens = maxTokens || ANTHROPIC_DEFAULT_MAX_TOKENS
+	// vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math.
+	const reservedTokens = maxTokens && maxTokens > 0 ? maxTokens : ANTHROPIC_DEFAULT_MAX_TOKENS
 	const prevContextTokens = totalTokens + lastMessageTokens
 	const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens
 
@@ -192,7 +203,20 @@ export function willManageContext({
 		// Invalid values fall back to global setting (effectiveThreshold already set)
 	}
 
-	const contextPercent = (100 * prevContextTokens) / contextWindow
+	// By default, measure usage against the full context window (original behavior shared by all
+	// providers). Opt-in (vscode-lm via getCondenseContextWindow) measures against the available
+	// input space (context window minus the reserved output budget) to match the UI context gauge,
+	// because that provider's advertised window is inflated relative to its usable input ceiling.
+	// Reserved output tokens can never hold conversation context. When the reserve is
+	// unknown/unlimited (e.g., vscode-lm reports -1), fall back to the full context window.
+	let contextPercent: number
+	if (useAvailableInputForContextPercent) {
+		const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0
+		const availableInputTokens = contextWindow - reservedForOutput
+		contextPercent = availableInputTokens > 0 ? (100 * prevContextTokens) / availableInputTokens : 100
+	} else {
+		contextPercent = (100 * prevContextTokens) / contextWindow
+	}
 	return contextPercent >= effectiveThreshold || prevContextTokens > allowedTokens
 }
 
@@ -229,6 +253,14 @@ export type ContextManagementOptions = {
 	cwd?: string
 	/** Optional controller for file access validation */
 	rooIgnoreController?: RooIgnoreController
+	/**
+	 * Opt-in: measure the condense percentage against the available input space
+	 * (contextWindow - reserved output) instead of the full context window. Only providers
+	 * whose advertised live window is inflated relative to the usable input ceiling (vscode-lm,
+	 * which exposes the seam via getCondenseContextWindow) set this. All other providers leave it
+	 * undefined and keep dividing by the full context window (original behavior).
+	 */
+	useAvailableInputForContextPercent?: boolean
 }
 
 export type ContextManagementResult = SummarizeResponse & {
@@ -262,12 +294,14 @@ export async function manageContext({
 	filesReadByRoo,
 	cwd,
 	rooIgnoreController,
+	useAvailableInputForContextPercent,
 }: ContextManagementOptions): Promise<ContextManagementResult> {
 	let error: string | undefined
 	let errorDetails: string | undefined
 	let cost = 0
 	// Calculate the maximum tokens reserved for response
-	const reservedTokens = maxTokens || ANTHROPIC_DEFAULT_MAX_TOKENS
+	// vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math.
+	const reservedTokens = maxTokens && maxTokens > 0 ? maxTokens : ANTHROPIC_DEFAULT_MAX_TOKENS
 
 	// Estimate tokens for the last message (which is always a user message)
 	const lastMessage = messages[messages.length - 1]
@@ -304,7 +338,20 @@ export async function manageContext({
 	// If no specific threshold is found for the profile, fall back to global setting
 
 	if (autoCondenseContext) {
-		const contextPercent = (100 * prevContextTokens) / contextWindow
+		// By default, measure usage against the full context window (original behavior shared by all
+		// providers). Opt-in (vscode-lm via getCondenseContextWindow) measures against the available
+		// input space (context window minus the reserved output budget) to match the UI context gauge,
+		// because that provider's advertised window is inflated relative to its usable input ceiling.
+		// Reserved output tokens can never hold conversation context. When the reserve is
+		// unknown/unlimited (e.g., vscode-lm reports -1), fall back to the full context window.
+		let contextPercent: number
+		if (useAvailableInputForContextPercent) {
+			const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0
+			const availableInputTokens = contextWindow - reservedForOutput
+			contextPercent = availableInputTokens > 0 ? (100 * prevContextTokens) / availableInputTokens : 100
+		} else {
+			contextPercent = (100 * prevContextTokens) / contextWindow
+		}
 		if (contextPercent >= effectiveThreshold || prevContextTokens > allowedTokens) {
 			// Attempt to intelligently condense the context
 			const result = await summarizeConversation({
diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
index 50d4674fd..81a243545 100644
--- a/src/core/task/Task.ts
+++ b/src/core/task/Task.ts
@@ -2688,9 +2688,13 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 								if (signal.aborted) {
 									reject(new Error("Request cancelled by user"))
 								} else {
-									signal.addEventListener("abort", () => {
-										reject(new Error("Request cancelled by user"))
-									}, { once: true })
+									signal.addEventListener(
+										"abort",
+										() => {
+											reject(new Error("Request cancelled by user"))
+										},
+										{ once: true },
+									)
 								}
 							})
 							return await Promise.race([nextPromise, abortPromise])
@@ -3734,7 +3738,14 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			settings: this.apiConfiguration,
 		})
 
-		const contextWindow = modelInfo.contextWindow
+		// VS Code LM (Copilot) measures usage against its static-table maxInputTokens, not the
+		// inflated live window, so context management runs in line with the context bar. Every other
+		// provider returns undefined here and falls back to modelInfo.contextWindow.
+		const contextWindow = this.api.getCondenseContextWindow?.() ?? modelInfo.contextWindow
+
+		// Only vscode-lm implements getCondenseContextWindow, so its presence scopes the
+		// available-input condense denominator to that provider; all others use the full window.
+		const useAvailableInputForContextPercent = typeof this.api.getCondenseContextWindow === "function"
 
 		// Get the current profile ID using the helper method
 		const currentProfileId = this.getCurrentProfileId(state)
@@ -3803,6 +3814,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				currentProfileId,
 				metadata,
 				environmentDetails,
+				useAvailableInputForContextPercent,
 			})
 
 			if (truncateResult.messages !== this.apiConversationHistory) {
@@ -3930,7 +3942,14 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				settings: this.apiConfiguration,
 			})
 
-			const contextWindow = modelInfo.contextWindow
+			// VS Code LM (Copilot) measures usage against its static-table maxInputTokens, not the
+			// inflated live window, so context management runs in line with the context bar. Every other
+			// provider returns undefined here and falls back to modelInfo.contextWindow.
+			const contextWindow = this.api.getCondenseContextWindow?.() ?? modelInfo.contextWindow
+
+			// Only vscode-lm implements getCondenseContextWindow, so its presence scopes the
+			// available-input condense denominator to that provider; all others use the full window.
+			const useAvailableInputForContextPercent = typeof this.api.getCondenseContextWindow === "function"
 
 			// Get the current profile ID using the helper method
 			const currentProfileId = this.getCurrentProfileId(state)
@@ -3955,6 +3974,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				profileThresholds,
 				currentProfileId,
 				lastMessageTokens,
+				useAvailableInputForContextPercent,
 			})
 
 			// Send condenseTaskContextStarted BEFORE manageContext to show in-progress indicator
@@ -4037,6 +4057,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 					filesReadByRoo: contextMgmtFilesReadByRoo,
 					cwd: this.cwd,
 					rooIgnoreController: this.rooIgnoreController,
+					useAvailableInputForContextPercent,
 				})
 				if (truncateResult.messages !== this.apiConversationHistory) {
 					await this.overwriteApiConversationHistory(truncateResult.messages)
@@ -4191,10 +4212,14 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		const iterator = stream[Symbol.asyncIterator]()
 
 		// Set up abort handling - when the signal is aborted, clean up the controller reference
-		abortSignal.addEventListener("abort", () => {
-			console.log(`[Task#${this.taskId}.${this.instanceId}] AbortSignal triggered for current request`)
-			this.currentRequestAbortController = undefined
-		}, { once: true })
+		abortSignal.addEventListener(
+			"abort",
+			() => {
+				console.log(`[Task#${this.taskId}.${this.instanceId}] AbortSignal triggered for current request`)
+				this.currentRequestAbortController = undefined
+			},
+			{ once: true },
+		)
 
 		try {
 			// Awaiting first chunk to see if it will throw an error.
@@ -4206,9 +4231,13 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				if (abortSignal.aborted) {
 					reject(new Error("Request cancelled by user"))
 				} else {
-					abortSignal.addEventListener("abort", () => {
-						reject(new Error("Request cancelled by user"))
-					}, { once: true })
+					abortSignal.addEventListener(
+						"abort",
+						() => {
+							reject(new Error("Request cancelled by user"))
+						},
+						{ once: true },
+					)
 				}
 			})
 
diff --git a/webview-ui/src/components/chat/TaskHeader.tsx b/webview-ui/src/components/chat/TaskHeader.tsx
index 4ddf5ef35..927d3d057 100644
--- a/webview-ui/src/components/chat/TaskHeader.tsx
+++ b/webview-ui/src/components/chat/TaskHeader.tsx
@@ -76,7 +76,8 @@ const TaskHeader = ({
 				: 0,
 		[model, modelId, apiConfiguration],
 	)
-	const reservedForOutput = maxTokens || 0
+	// vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math.
+	const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0
 
 	const condenseButton = (
 		<LucideIconButton
diff --git a/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx b/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx
index 41aa452ab..c84538263 100644
--- a/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx
+++ b/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx
@@ -267,5 +267,19 @@ describe("TaskHeader", () => {
 			// Should show 0% when available input space is 0
 			expect(screen.getByText("0%")).toBeInTheDocument()
 		})
+
+		it("should treat a negative maxTokens (vscode-lm reports -1) as zero reserve", () => {
+			// vscode-lm reports maxTokens: -1 (unlimited). A naive `maxTokens || 0` keeps -1,
+			// which would inflate available input space and skew the percentage. The guard must
+			// treat -1 as a zero reserve so available space == contextWindow.
+			// contextTokens = 250, contextWindow = 1000, reservedForOutput = 0
+			// Percentage = 250 / 1000 * 100 = 25%
+			mockModelInfo = { contextWindow: 1000, maxTokens: -1 }
+			mockMaxOutputTokens = -1
+
+			renderTaskHeader({ contextTokens: 250 })
+
+			expect(screen.getByText("25%")).toBeInTheDocument()
+		})
 	})
 })
diff --git a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
index 0dc42129c..3ffe85e14 100644
--- a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
+++ b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
@@ -14,6 +14,8 @@ import {
 	minimaxDefaultModelId,
 	minimaxModels,
 	openRouterDefaultModelId,
+	vscodeLlmModels,
+	vscodeLlmDefaultModelId,
 } from "@roo-code/types"
 
 import { useSelectedModel } from "../useSelectedModel"
@@ -772,4 +774,77 @@ describe("useSelectedModel", () => {
 			expect(result.current.info).toEqual(minimaxModels["MiniMax-M2.7"])
 		})
 	})
+
+	describe("vscode-lm provider", () => {
+		beforeEach(() => {
+			mockUseRouterModels.mockReturnValue({
+				data: {
+					openrouter: {},
+					requesty: {},
+					litellm: {},
+				},
+				isLoading: false,
+				isError: false,
+			} as any)
+
+			mockUseOpenRouterModelProviders.mockReturnValue({
+				data: {},
+				isLoading: false,
+				isError: false,
+			} as any)
+		})
+
+		it("resolves a listed family's contextWindow to its maxInputTokens", () => {
+			const family = vscodeLlmDefaultModelId
+			const apiConfiguration: ProviderSettings = {
+				apiProvider: "vscode-lm",
+				vsCodeLmModelSelector: { vendor: "copilot", family },
+			}
+
+			const wrapper = createWrapper()
+			const { result } = renderHook(() => useSelectedModel(apiConfiguration), { wrapper })
+
+			expect(result.current.provider).toBe("vscode-lm")
+			expect(result.current.id).toBe(`copilot/${family}`)
+			// The bar and the condense gate share one source of truth: contextWindow === maxInputTokens.
+			expect(result.current.info?.contextWindow).toBe(vscodeLlmModels[family].maxInputTokens)
+			expect(result.current.info?.supportsImages).toBe(false)
+		})
+
+		it("pins a divergent family's contextWindow to maxInputTokens, not its advertised window", () => {
+			// claude-opus-4.8 is the row where contextWindow (679560) and maxInputTokens (197897) DIFFER.
+			// The hook must surface maxInputTokens so the bar matches the condense gate; a field swap to
+			// the advertised contextWindow would be caught here (unlike the default model where they match).
+			const family = "claude-opus-4.8"
+			const apiConfiguration: ProviderSettings = {
+				apiProvider: "vscode-lm",
+				vsCodeLmModelSelector: { vendor: "copilot", family },
+			}
+
+			const wrapper = createWrapper()
+			const { result } = renderHook(() => useSelectedModel(apiConfiguration), { wrapper })
+
+			expect(result.current.provider).toBe("vscode-lm")
+			expect(result.current.id).toBe(`copilot/${family}`)
+			expect(result.current.info?.contextWindow).toBe(vscodeLlmModels[family].maxInputTokens) // 197897
+			expect(result.current.info?.contextWindow).not.toBe(vscodeLlmModels[family].contextWindow) // NOT 679560
+			expect(result.current.info?.supportsImages).toBe(false)
+		})
+
+		it("falls back to the default model's window for an unlisted family (NOT 128000)", () => {
+			const apiConfiguration: ProviderSettings = {
+				apiProvider: "vscode-lm",
+				vsCodeLmModelSelector: { vendor: "copilot", family: "totally-unknown-family" },
+			}
+
+			const wrapper = createWrapper()
+			const { result } = renderHook(() => useSelectedModel(apiConfiguration), { wrapper })
+
+			// On a family miss we must NOT fall back to openAiModelInfoSaneDefaults' 128000 window,
+			// which would diverge from the gate. Instead, use the default model's maxInputTokens.
+			expect(result.current.info?.contextWindow).not.toBe(128000)
+			expect(result.current.info?.contextWindow).toBe(vscodeLlmModels[vscodeLlmDefaultModelId].maxInputTokens)
+			expect(result.current.info?.supportsImages).toBe(false)
+		})
+	})
 })
diff --git a/webview-ui/src/components/ui/hooks/useSelectedModel.ts b/webview-ui/src/components/ui/hooks/useSelectedModel.ts
index d3ebb6c0d..a5940ba7d 100644
--- a/webview-ui/src/components/ui/hooks/useSelectedModel.ts
+++ b/webview-ui/src/components/ui/hooks/useSelectedModel.ts
@@ -310,8 +310,22 @@ function getSelectedModel({
 				? `${apiConfiguration.vsCodeLmModelSelector.vendor}/${apiConfiguration.vsCodeLmModelSelector.family}`
 				: vscodeLlmDefaultModelId
 			const modelFamily = apiConfiguration?.vsCodeLmModelSelector?.family ?? vscodeLlmDefaultModelId
-			const info = vscodeLlmModels[modelFamily as keyof typeof vscodeLlmModels]
-			return { id, info: { ...openAiModelInfoSaneDefaults, ...info, supportsImages: false } } // VSCode LM API currently doesn't support images.
+			// On a family miss, fall back to the default model entry instead of openAiModelInfoSaneDefaults,
+			// whose 128K contextWindow would diverge from the gate and make the bar read >100% while
+			// auto-condense never fires (the gate uses the live window).
+			const listedModel =
+				vscodeLlmModels[modelFamily as keyof typeof vscodeLlmModels] ?? vscodeLlmModels[vscodeLlmDefaultModelId]
+			// Set contextWindow = maxInputTokens so the UI bar matches what the condense gate uses for
+			// vscode-lm. The gate's primary window comes from getCondenseContextWindow() (which returns the
+			// static-table maxInputTokens); getModel().info.contextWindow is only the fallback. Sharing
+			// maxInputTokens keeps the bar and the gate on a single source of truth.
+			const info: ModelInfo = {
+				...openAiModelInfoSaneDefaults,
+				...listedModel,
+				contextWindow: listedModel.maxInputTokens,
+				supportsImages: false, // VSCode LM API currently doesn't support images.
+			}
+			return { id, info }
 		}
 		case "sambanova": {
 			const id = apiConfiguration.apiModelId ?? defaultModelId