From 4203a2d181519481311ca91f9ab90da566b025f7 Mon Sep 17 00:00:00 2001
From: awschmeder <awschmeder@lbl.gov>
Date: Tue, 16 Jun 2026 02:57:58 -0700
Subject: [PATCH 1/9] fix: LiteLLM provider model desync after Sync Models or
 settings navigation

- Invalidate React Query router-models cache on successful sync in LiteLLM.tsx
  so useSelectedModel picks up the refreshed list (mirrors Poe.tsx fix)
- Preserve configured litellm model ID when model list is empty/loading in
  useSelectedModel.ts instead of silently falling back to hardcoded default
- Pass litellm credentials in the debounced requestRouterModels message in
  ApiOptions.tsx to prevent fetches with stale config from clearing the model list
---
 .changeset/fix-litellm-model-desync.md        |  26 ++++
 src/api/providers/fetchers/modelCache.ts      | 127 +++++++++++++-----
 src/api/providers/router-provider.ts          |  11 +-
 src/core/webview/webviewMessageHandler.ts     |   8 +-
 .../src/components/settings/ApiOptions.tsx    |  10 +-
 .../components/settings/providers/LiteLLM.tsx |   7 +-
 .../hooks/__tests__/useSelectedModel.spec.ts  |  29 +++-
 .../components/ui/hooks/useSelectedModel.ts   |  10 +-
 8 files changed, 186 insertions(+), 42 deletions(-)
 create mode 100644 .changeset/fix-litellm-model-desync.md

diff --git a/.changeset/fix-litellm-model-desync.md b/.changeset/fix-litellm-model-desync.md
new file mode 100644
index 0000000000..f411e21127
--- /dev/null
+++ b/.changeset/fix-litellm-model-desync.md
@@ -0,0 +1,26 @@
+---
+"zoo-code": patch
+---
+
+Fix LiteLLM provider cache key collision, credential priority, and model-selection fallback to non-existent default.
+
+Two bugs are addressed:
+
+1. **Cache key collision**: All URL-scoped providers (LiteLLM, Ollama, LM Studio, Poe, DeepSeek,
+   Requesty) previously shared one cache entry keyed only on the provider name. Switching between
+   profiles backed by different servers silently served the wrong model list and the stale list
+   persisted across VS Code restarts via the disk cache. Fixed with a compound cache key:
+   URL-scoped providers use `provider:baseUrl`; key-scoped providers (LiteLLM, Poe, Requesty)
+   additionally include a short sha256 hash of the API key (`provider:baseUrl:sha256(apiKey)`) so that
+   two different API keys on the same server never share a cache entry (relevant when the server
+   enforces per-key model allowlists). The `RouterProvider.getModel()` cold-start fallback is also
+   corrected to pass the full options so it resolves the same compound key.
+
+2. **Silent fallback to hardcoded default**: When the LiteLLM model list was empty (due to the
+   collision above, a failed sync, or a transient error), `useSelectedModel` reset the configured
+   model ID to `claude-3-7-sonnet-20250219` -- a model that typically does not exist on user
+   LiteLLM servers. Four sub-fixes: preserve the configured model ID when the list is empty;
+   invalidate the React Query router-models cache after a successful "Sync Models" click; pass the
+   current LiteLLM credentials in the debounced `requestRouterModels` message; and correct the
+   credential priority in `webviewMessageHandler.ts` so that message values (current unsaved field
+   state) take precedence over stale saved config, matching the pattern already used for DeepSeek.
diff --git a/src/api/providers/fetchers/modelCache.ts b/src/api/providers/fetchers/modelCache.ts
index 404a60cd85..b252a4f5b8 100644
--- a/src/api/providers/fetchers/modelCache.ts
+++ b/src/api/providers/fetchers/modelCache.ts
@@ -1,6 +1,7 @@
 import * as path from "path"
 import fs from "fs/promises"
 import * as fsSync from "fs"
+import { createHash } from "crypto"
 
 import NodeCache from "node-cache"
 import { z } from "zod"
@@ -34,9 +35,10 @@ const memoryCache = new NodeCache({ stdTTL: 5 * 60, checkperiod: 5 * 60 })
 // Zod schema for validating ModelRecord structure from disk cache
 const modelRecordSchema = z.record(z.string(), modelInfoSchema)
 
-// Track in-flight refresh requests to prevent concurrent API calls for the same provider
-// This prevents race conditions where multiple calls might overwrite each other's results
-const inFlightRefresh = new Map<RouterName, Promise<ModelRecord>>()
+// Track in-flight refresh requests to prevent concurrent API calls for the same provider+url.
+// Keyed on the compound cache key (see getCacheKey) so that two different LiteLLM servers never
+// deduplicate each other's in-flight refreshes.
+const inFlightRefresh = new Map<string, Promise<ModelRecord>>()
 
 // Providers whose model lists are scoped to the signed-in user (e.g. per-account
 // allowlists or org policies). For these we MUST NOT cache results on disk or
@@ -44,18 +46,75 @@ const inFlightRefresh = new Map<RouterName, Promise<ModelRecord>>()
 // list to the next user, and stale data could mask backend allowlist updates.
 const AUTH_SCOPED_PROVIDERS: ReadonlySet<RouterName> = new Set(["zoo-gateway"])
 
+// Providers whose model list is determined by the server URL, not just by the provider name.
+// Each unique baseUrl must be cached independently so that switching endpoints never serves
+// stale results from a previously-cached server.
+const URL_SCOPED_PROVIDERS: ReadonlySet<RouterName> = new Set([
+	"litellm",
+	"poe",
+	"deepseek",
+	"ollama",
+	"lmstudio",
+	"requesty",
+])
+
+// Providers where the API key itself determines which models are visible (e.g. per-key
+// allowlists on a shared proxy). For these the cache key also includes a short hash of
+// the API key so that two different keys on the same server never share a cache entry.
+const KEY_SCOPED_PROVIDERS: ReadonlySet<RouterName> = new Set([
+	"litellm", // Per-key model allowlists are a first-class LiteLLM proxy feature
+	"poe", // Per-account model availability
+	"requesty", // Per-account custom model policies
+])
+
 function isAuthScopedProvider(provider: RouterName): boolean {
 	return AUTH_SCOPED_PROVIDERS.has(provider)
 }
 
-async function writeModels(router: RouterName, data: ModelRecord) {
-	const filename = `${router}_models.json`
+/**
+ * Build a cache key that is unique per provider+server+key combination.
+ *
+ * - URL-scoped providers include the normalized baseUrl so that two different servers
+ *   of the same provider type never share a cache entry.
+ * - Key-scoped providers additionally fold in a short sha256 hash of the API key so that
+ *   two different API keys on the same server never share a cache entry (relevant when
+ *   the server enforces per-key model allowlists, e.g. LiteLLM, Poe, Requesty).
+ */
+function getCacheKey(options: GetModelsOptions): string {
+	const { provider } = options
+	const isUrlScoped = URL_SCOPED_PROVIDERS.has(provider as RouterName)
+	const isKeyScoped = KEY_SCOPED_PROVIDERS.has(provider as RouterName)
+
+	if (isUrlScoped && options.baseUrl) {
+		// Strip trailing slashes so "http://host:4000/" and "http://host:4000" map to the same key.
+		const normalizedUrl = options.baseUrl.replace(/\/+$/, "")
+		if (isKeyScoped && options.apiKey) {
+			// Short (16-char) sha256 prefix -- enough to make collisions effectively impossible
+			// while keeping filenames readable. We do not need the full digest here.
+			const keyHash = createHash("sha256").update(options.apiKey).digest("hex").slice(0, 16)
+			return `${provider}:${normalizedUrl}:${keyHash}`
+		}
+		return `${provider}:${normalizedUrl}`
+	}
+	return provider
+}
+
+/**
+ * Convert a cache key to a filesystem-safe filename component.
+ * Replaces characters that are illegal or awkward in filenames with underscores.
+ */
+function cacheKeyToFilename(cacheKey: string): string {
+	return cacheKey.replace(/[:/\\?#*<>|"\s]+/g, "_")
+}
+
+async function writeModels(cacheKey: string, data: ModelRecord) {
+	const filename = `${cacheKeyToFilename(cacheKey)}_models.json`
 	const cacheDir = await getCacheDirectoryPath(ContextProxy.instance.globalStorageUri.fsPath)
 	await safeWriteJson(path.join(cacheDir, filename), data)
 }
 
-async function readModels(router: RouterName): Promise<ModelRecord | undefined> {
-	const filename = `${router}_models.json`
+async function readModels(cacheKey: string): Promise<ModelRecord | undefined> {
+	const filename = `${cacheKeyToFilename(cacheKey)}_models.json`
 	const cacheDir = await getCacheDirectoryPath(ContextProxy.instance.globalStorageUri.fsPath)
 	const filePath = path.join(cacheDir, filename)
 	const exists = await fileExistsAtPath(filePath)
@@ -133,10 +192,11 @@ async function fetchModelsFromProvider(options: GetModelsOptions): Promise<Model
  */
 export const getModels = async (options: GetModelsOptions): Promise<ModelRecord> => {
 	const { provider } = options
+	const cacheKey = getCacheKey(options)
 
 	const shouldSkipCache = isAuthScopedProvider(provider)
 
-	let models = shouldSkipCache ? undefined : getModelsFromCache(provider)
+	let models = shouldSkipCache ? undefined : getModelsFromCache(options)
 
 	if (models) {
 		return models
@@ -149,10 +209,10 @@ export const getModels = async (options: GetModelsOptions): Promise<ModelRecord>
 		// Only cache non-empty results so a failed API response doesn't get persisted
 		// as if the provider had no models. Auth-scoped providers skip caching entirely.
 		if (modelCount > 0 && !shouldSkipCache) {
-			memoryCache.set(provider, models)
+			memoryCache.set(cacheKey, models)
 
-			await writeModels(provider, models).catch((err) =>
-				console.error(`[MODEL_CACHE] Error writing ${provider} models to file cache:`, err),
+			await writeModels(cacheKey, models).catch((err) =>
+				console.error(`[MODEL_CACHE] Error writing ${cacheKey} models to file cache:`, err),
 			)
 		} else if (modelCount === 0) {
 			TelemetryService.instance.captureEvent(TelemetryEventName.MODEL_CACHE_EMPTY_RESPONSE, {
@@ -182,17 +242,18 @@ export const getModels = async (options: GetModelsOptions): Promise<ModelRecord>
  */
 export const refreshModels = async (options: GetModelsOptions): Promise<ModelRecord> => {
 	const { provider } = options
+	const cacheKey = getCacheKey(options)
 
 	const shouldSkipCache = isAuthScopedProvider(provider)
 
-	// Check if there's already an in-flight refresh for this provider.
+	// Check if there's already an in-flight refresh for this provider+url combination.
 	// This prevents race conditions where multiple concurrent refreshes might
 	// overwrite each other's results. Skip de-duplication for auth-scoped
 	// providers because two concurrent calls may carry different tokens
 	// (e.g., after a sign-out/sign-in within the same session) and we must
 	// not return the first caller's results to the second caller.
 	if (!shouldSkipCache) {
-		const existingRequest = inFlightRefresh.get(provider)
+		const existingRequest = inFlightRefresh.get(cacheKey)
 		if (existingRequest) {
 			return existingRequest
 		}
@@ -206,7 +267,7 @@ export const refreshModels = async (options: GetModelsOptions): Promise<ModelRec
 			const modelCount = Object.keys(models).length
 
 			// Get existing cached data for comparison
-			const existingCache = shouldSkipCache ? undefined : getModelsFromCache(provider)
+			const existingCache = shouldSkipCache ? undefined : getModelsFromCache(options)
 			const existingCount = existingCache ? Object.keys(existingCache).length : 0
 
 			if (modelCount === 0) {
@@ -224,10 +285,10 @@ export const refreshModels = async (options: GetModelsOptions): Promise<ModelRec
 			}
 
 			if (!shouldSkipCache) {
-				memoryCache.set(provider, models)
+				memoryCache.set(cacheKey, models)
 
-				await writeModels(provider, models).catch((err) =>
-					console.error(`[refreshModels] Error writing ${provider} models to disk:`, err),
+				await writeModels(cacheKey, models).catch((err) =>
+					console.error(`[refreshModels] Error writing ${cacheKey} models to disk:`, err),
 				)
 			}
 
@@ -235,23 +296,23 @@ export const refreshModels = async (options: GetModelsOptions): Promise<ModelRec
 		} catch (error) {
 			// Log the error for debugging, then return existing cache if available (graceful degradation).
 			// For auth-scoped providers (zoo-gateway) we MUST NOT return cached models from a prior
-			// session, since they could belong to a different user — return empty instead.
-			console.error(`[refreshModels] Failed to refresh ${provider} models:`, error)
+			// session, since they could belong to a different user -- return empty instead.
+			console.error(`[refreshModels] Failed to refresh ${cacheKey} models:`, error)
 			if (shouldSkipCache) {
 				return {}
 			}
-			return getModelsFromCache(provider) || {}
+			return getModelsFromCache(options) || {}
 		} finally {
 			// Always clean up the in-flight tracking
 			if (!shouldSkipCache) {
-				inFlightRefresh.delete(provider)
+				inFlightRefresh.delete(cacheKey)
 			}
 		}
 	})()
 
 	// Track the in-flight request (auth-scoped providers are excluded; see above).
 	if (!shouldSkipCache) {
-		inFlightRefresh.set(provider, refreshPromise)
+		inFlightRefresh.set(cacheKey, refreshPromise)
 	}
 
 	return refreshPromise
@@ -290,7 +351,6 @@ export async function initializeModelCacheRefresh(): Promise<void> {
  * @param refresh - If true, immediately fetch fresh data from API
  */
 export const flushModels = async (options: GetModelsOptions, refresh: boolean = false): Promise<void> => {
-	const { provider } = options
 	if (refresh) {
 		// Don't delete memory cache - let refreshModels atomically replace it
 		// This prevents a race condition where getModels() might be called
@@ -298,8 +358,10 @@ export const flushModels = async (options: GetModelsOptions, refresh: boolean =
 		// Await the refresh to ensure the cache is updated before returning
 		await refreshModels(options)
 	} else {
-		// Only delete memory cache when not refreshing
-		memoryCache.del(provider)
+		// Only delete memory cache when not refreshing. Use the compound cache key so that
+		// URL-scoped providers (litellm, poe, etc.) actually evict the per-server entry rather
+		// than a bare provider-name entry that was never written.
+		memoryCache.del(getCacheKey(options))
 	}
 }
 
@@ -311,9 +373,12 @@ export const flushModels = async (options: GetModelsOptions, refresh: boolean =
  * @param provider - The provider to get models for.
  * @returns Models from memory cache, disk cache, or undefined if not cached.
  */
-export function getModelsFromCache(provider: ProviderName): ModelRecord | undefined {
+export function getModelsFromCache(
+	options: GetModelsOptions | ProviderName,
+): ModelRecord | undefined {
+	const cacheKey = typeof options === "string" ? options : getCacheKey(options)
 	// Check memory cache first (fast)
-	const memoryModels = memoryCache.get<ModelRecord>(provider)
+	const memoryModels = memoryCache.get<ModelRecord>(cacheKey)
 	if (memoryModels) {
 		return memoryModels
 	}
@@ -321,7 +386,7 @@ export function getModelsFromCache(provider: ProviderName): ModelRecord | undefi
 	// Memory cache miss - try to load from disk synchronously
 	// This is acceptable because it only happens on cold start or after cache expiry
 	try {
-		const filename = `${provider}_models.json`
+		const filename = `${cacheKeyToFilename(cacheKey)}_models.json`
 		const cacheDir = getCacheDirectoryPathSync()
 		if (!cacheDir) {
 			return undefined
@@ -339,19 +404,19 @@ export function getModelsFromCache(provider: ProviderName): ModelRecord | undefi
 			const validation = modelRecordSchema.safeParse(models)
 			if (!validation.success) {
 				console.error(
-					`[MODEL_CACHE] Invalid disk cache data structure for ${provider}:`,
+					`[MODEL_CACHE] Invalid disk cache data structure for ${cacheKey}:`,
 					validation.error.format(),
 				)
 				return undefined
 			}
 
 			// Populate memory cache for future fast access
-			memoryCache.set(provider, validation.data)
+			memoryCache.set(cacheKey, validation.data)
 
 			return validation.data
 		}
 	} catch (error) {
-		console.error(`[MODEL_CACHE] Error loading ${provider} models from disk:`, error)
+		console.error(`[MODEL_CACHE] Error loading ${cacheKey} models from disk:`, error)
 	}
 
 	return undefined
diff --git a/src/api/providers/router-provider.ts b/src/api/providers/router-provider.ts
index 20419bb45a..84c5774bb6 100644
--- a/src/api/providers/router-provider.ts
+++ b/src/api/providers/router-provider.ts
@@ -69,9 +69,14 @@ export abstract class RouterProvider extends BaseProvider {
 			return { id, info: this.models[id] }
 		}
 
-		// Fall back to global cache (synchronous disk/memory cache)
-		// This ensures models are available before fetchModel() is called
-		const cachedModels = getModelsFromCache(this.name)
+		// Fall back to global cache (synchronous disk/memory cache).
+		// Pass the full options so URL-scoped providers (litellm, ollama, etc.)
+		// resolve the same compound cache key that fetchModel() wrote under.
+		const cachedModels = getModelsFromCache({
+			provider: this.name,
+			baseUrl: this.client.baseURL,
+			apiKey: this.client.apiKey,
+		})
 		if (cachedModels?.[id]) {
 			// Also populate instance models for future calls
 			this.models = cachedModels
diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts
index 5a827d5126..0eaf1126dc 100644
--- a/src/core/webview/webviewMessageHandler.ts
+++ b/src/core/webview/webviewMessageHandler.ts
@@ -1074,9 +1074,11 @@ export const webviewMessageHandler = async (
 				},
 			]
 
-			// LiteLLM is conditional on baseUrl+apiKey
-			const litellmApiKey = apiConfiguration.litellmApiKey || message?.values?.litellmApiKey
-			const litellmBaseUrl = apiConfiguration.litellmBaseUrl || message?.values?.litellmBaseUrl
+			// LiteLLM is conditional on baseUrl+apiKey.
+			// Prefer explicit values from message (current unsaved field state) over saved config,
+			// matching the pattern used for DeepSeek and other credential-carrying providers.
+			const litellmApiKey = message?.values?.litellmApiKey ?? apiConfiguration.litellmApiKey
+			const litellmBaseUrl = message?.values?.litellmBaseUrl ?? apiConfiguration.litellmBaseUrl
 
 			if (litellmApiKey && litellmBaseUrl) {
 				// If explicit credentials are provided in message.values (from Refresh Models button),
diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx
index 70617a1ee6..d54e0b634e 100644
--- a/webview-ui/src/components/settings/ApiOptions.tsx
+++ b/webview-ui/src/components/settings/ApiOptions.tsx
@@ -222,7 +222,15 @@ const ApiOptions = ({
 				requestLmStudioModels(apiConfiguration?.lmStudioBaseUrl)
 			} else if (selectedProvider === "vscode-lm") {
 				vscode.postMessage({ type: "requestVsCodeLmModels" })
-			} else if (selectedProvider === "litellm" || selectedProvider === "poe") {
+			} else if (selectedProvider === "litellm") {
+				vscode.postMessage({
+					type: "requestRouterModels",
+					values: {
+						litellmApiKey: apiConfiguration?.litellmApiKey,
+						litellmBaseUrl: apiConfiguration?.litellmBaseUrl,
+					},
+				})
+			} else if (selectedProvider === "poe") {
 				vscode.postMessage({ type: "requestRouterModels" })
 			}
 		},
diff --git a/webview-ui/src/components/settings/providers/LiteLLM.tsx b/webview-ui/src/components/settings/providers/LiteLLM.tsx
index 38ae1f3a96..0063afaa17 100644
--- a/webview-ui/src/components/settings/providers/LiteLLM.tsx
+++ b/webview-ui/src/components/settings/providers/LiteLLM.tsx
@@ -1,5 +1,6 @@
 import { useCallback, useState, useEffect, useRef } from "react"
 import { VSCodeTextField, VSCodeCheckbox } from "@vscode/webview-ui-toolkit/react"
+import { useQueryClient } from "@tanstack/react-query"
 
 import {
 	type ProviderSettings,
@@ -34,6 +35,7 @@ export const LiteLLM = ({
 	simplifySettings,
 }: LiteLLMProps) => {
 	const { t } = useAppTranslation()
+	const queryClient = useQueryClient()
 	const { routerModels } = useExtensionState()
 	const [refreshStatus, setRefreshStatus] = useState<"idle" | "loading" | "success" | "error">("idle")
 	const [refreshError, setRefreshError] = useState<string | undefined>()
@@ -55,6 +57,9 @@ export const LiteLLM = ({
 				if (refreshStatus === "loading") {
 					if (!litellmErrorJustReceived.current) {
 						setRefreshStatus("success")
+						// Invalidate the react-query router models cache so
+						// useSelectedModel picks up the refreshed list.
+						queryClient.invalidateQueries({ queryKey: ["routerModels"] })
 					}
 					// If litellmErrorJustReceived.current is true, status is already (or will be) "error".
 				}
@@ -65,7 +70,7 @@ export const LiteLLM = ({
 		return () => {
 			window.removeEventListener("message", handleMessage)
 		}
-	}, [refreshStatus, refreshError, setRefreshStatus, setRefreshError])
+	}, [refreshStatus, queryClient])
 
 	const handleInputChange = useCallback(
 		<K extends keyof ProviderSettings, E>(
diff --git a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
index 0dc42129c0..34322e4e96 100644
--- a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
+++ b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
@@ -571,12 +571,37 @@ describe("useSelectedModel", () => {
 			const { result } = renderHook(() => useSelectedModel(apiConfiguration), { wrapper })
 
 			expect(result.current.provider).toBe("litellm")
-			// Should fall back to default model ID since "some-model" doesn't exist in empty litellm models
-			expect(result.current.id).toBe("claude-3-7-sonnet-20250219")
+			// Should preserve configured model ID since "some-model" doesn't exist in empty litellm models
+			expect(result.current.id).toBe("some-model")
 			// Should use litellmDefaultModelInfo as fallback
 			expect(result.current.info).toEqual(litellmDefaultModelInfo)
 		})
 
+		it("should return an empty model ID when the list is empty and no model is configured", () => {
+			mockUseRouterModels.mockReturnValue({
+				data: {
+					openrouter: {},
+					requesty: {},
+					litellm: {},
+				},
+				isLoading: false,
+				isError: false,
+			} as any)
+
+			const apiConfiguration: ProviderSettings = {
+				apiProvider: "litellm",
+				// litellmModelId intentionally omitted
+			}
+
+			const wrapper = createWrapper()
+			const { result } = renderHook(() => useSelectedModel(apiConfiguration), { wrapper })
+
+			expect(result.current.provider).toBe("litellm")
+			// LiteLLM has no inherent default; with nothing configured the ID is empty rather than a phantom model
+			expect(result.current.id).toBe("")
+			expect(result.current.info).toEqual(litellmDefaultModelInfo)
+		})
+
 		it("should use litellmDefaultModelInfo when selected model not found in routerModels", () => {
 			mockUseRouterModels.mockReturnValue({
 				data: {
diff --git a/webview-ui/src/components/ui/hooks/useSelectedModel.ts b/webview-ui/src/components/ui/hooks/useSelectedModel.ts
index d3ebb6c0dd..7f60f5f0a0 100644
--- a/webview-ui/src/components/ui/hooks/useSelectedModel.ts
+++ b/webview-ui/src/components/ui/hooks/useSelectedModel.ts
@@ -167,7 +167,15 @@ function getSelectedModel({
 			return { id, info: routerInfo }
 		}
 		case "litellm": {
-			const id = getValidatedModelId(apiConfiguration.litellmModelId, routerModels.litellm, defaultModelId)
+			// When the model list is empty (not yet loaded or still loading),
+			// preserve the configured model ID. LiteLLM is a proxy with no inherent
+			// default model, so we never substitute a hardcoded default here -- when
+			// nothing is configured we return an empty ID so the picker shows "no
+			// selection" rather than a phantom model that does not exist on the server.
+			const hasModels = routerModels.litellm && Object.keys(routerModels.litellm).length > 0
+			const id = hasModels
+				? getValidatedModelId(apiConfiguration.litellmModelId, routerModels.litellm, defaultModelId)
+				: (apiConfiguration.litellmModelId ?? "")
 			const routerInfo = routerModels.litellm?.[id]
 			return { id, info: routerInfo ?? litellmDefaultModelInfo }
 		}

From fd44f2af452624fb0e0ee0418e085837a399bd40 Mon Sep 17 00:00:00 2001
From: awschmeder <awschmeder@lbl.gov>
Date: Thu, 18 Jun 2026 13:30:09 -0700
Subject: [PATCH 2/9] fix: address CodeRabbit review comments on litellm model
 desync

- Refactor getCacheKey to compute URL and key components independently,
  preventing key-scoped providers on the default server from collapsing
  to the same cache entry
- Replace character-substitution in cacheKeyToFilename with SHA-256 hash
  for collision-free filesystem-safe filenames
- Block AUTH_SCOPED_PROVIDERS (zoo-gateway) in getModelsFromCache to
  prevent stale user-specific model lists from leaking across sessions
- Wrap requestRouterModels switch case body in braces to satisfy Biome
  noSwitchDeclarations lint rule
- Preserve configured modelId in RouterProvider.getModel() last-resort
  fallback instead of silently defaulting to hardcoded defaultModelId
- Add codeql[js/insufficient-password-hash] suppressions on SHA-256
  cache key discriminator calls (false positive: not password hashing)
---
 src/api/providers/fetchers/modelCache.ts  | 51 ++++++++++++++++-------
 src/api/providers/router-provider.ts      |  7 +++-
 src/core/webview/webviewMessageHandler.ts |  3 +-
 3 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/src/api/providers/fetchers/modelCache.ts b/src/api/providers/fetchers/modelCache.ts
index b252a4f5b8..c953232c59 100644
--- a/src/api/providers/fetchers/modelCache.ts
+++ b/src/api/providers/fetchers/modelCache.ts
@@ -36,7 +36,7 @@ const memoryCache = new NodeCache({ stdTTL: 5 * 60, checkperiod: 5 * 60 })
 const modelRecordSchema = z.record(z.string(), modelInfoSchema)
 
 // Track in-flight refresh requests to prevent concurrent API calls for the same provider+url.
-// Keyed on the compound cache key (see getCacheKey) so that two different LiteLLM servers never
+// Keyed on the compound cache key (see getCacheKey) so that two different URL-scoped servers never
 // deduplicate each other's in-flight refreshes.
 const inFlightRefresh = new Map<string, Promise<ModelRecord>>()
 
@@ -59,7 +59,7 @@ const URL_SCOPED_PROVIDERS: ReadonlySet<RouterName> = new Set([
 ])
 
 // Providers where the API key itself determines which models are visible (e.g. per-key
-// allowlists on a shared proxy). For these the cache key also includes a short hash of
+// allowlists). For these the cache key also includes a short hash of
 // the API key so that two different keys on the same server never share a cache entry.
 const KEY_SCOPED_PROVIDERS: ReadonlySet<RouterName> = new Set([
 	"litellm", // Per-key model allowlists are a first-class LiteLLM proxy feature
@@ -85,26 +85,39 @@ function getCacheKey(options: GetModelsOptions): string {
 	const isUrlScoped = URL_SCOPED_PROVIDERS.has(provider as RouterName)
 	const isKeyScoped = KEY_SCOPED_PROVIDERS.has(provider as RouterName)
 
-	if (isUrlScoped && options.baseUrl) {
-		// Strip trailing slashes so "http://host:4000/" and "http://host:4000" map to the same key.
-		const normalizedUrl = options.baseUrl.replace(/\/+$/, "")
-		if (isKeyScoped && options.apiKey) {
-			// Short (16-char) sha256 prefix -- enough to make collisions effectively impossible
-			// while keeping filenames readable. We do not need the full digest here.
-			const keyHash = createHash("sha256").update(options.apiKey).digest("hex").slice(0, 16)
-			return `${provider}:${normalizedUrl}:${keyHash}`
-		}
-		return `${provider}:${normalizedUrl}`
-	}
+	// Build URL and key components independently so that key-scoped providers
+	// without a custom baseUrl still get a per-key cache entry (otherwise two
+	// different keys on the default server would collapse to the same entry).
+	// Strip trailing slashes so "http://host:4000/" and "http://host:4000" map to the same key.
+	const urlPart = isUrlScoped && options.baseUrl ? options.baseUrl.replace(/\/+$/, "") : undefined
+	// Short (16-char) sha256 prefix -- enough to make collisions effectively impossible.
+	// Not a password hash -- SHA-256 is used here purely as a cache key discriminator to
+	// distinguish between different API keys on the same server. It is never used for
+	// authentication or stored as a credential.
+	// codeql[js/insufficient-password-hash]
+	const keyPart =
+		isKeyScoped && options.apiKey
+			? createHash("sha256").update(options.apiKey).digest("hex").slice(0, 16)
+			: undefined
+
+	if (urlPart && keyPart) return `${provider}:${urlPart}:${keyPart}`
+	if (urlPart) return `${provider}:${urlPart}`
+	if (keyPart) return `${provider}:${keyPart}`
 	return provider
 }
 
 /**
  * Convert a cache key to a filesystem-safe filename component.
- * Replaces characters that are illegal or awkward in filenames with underscores.
+ * Hashes the full key to guarantee uniqueness while preserving a readable
+ * provider prefix at the start of the filename.
  */
 function cacheKeyToFilename(cacheKey: string): string {
-	return cacheKey.replace(/[:/\\?#*<>|"\s]+/g, "_")
+	const prefix = cacheKey.split(":")[0] // provider name -- always filesystem-safe
+	// Not a password hash -- SHA-256 is used here to produce a collision-free filename
+	// component from the compound cache key. It is never used for authentication.
+	// codeql[js/insufficient-password-hash]
+	const hash = createHash("sha256").update(cacheKey).digest("hex").slice(0, 16)
+	return `${prefix}_${hash}`
 }
 
 async function writeModels(cacheKey: string, data: ModelRecord) {
@@ -376,6 +389,14 @@ export const flushModels = async (options: GetModelsOptions, refresh: boolean =
 export function getModelsFromCache(
 	options: GetModelsOptions | ProviderName,
 ): ModelRecord | undefined {
+	// Auth-scoped providers (e.g. zoo-gateway) must never be served from cache --
+	// their model lists are user-specific and a stale file left over from a previous
+	// session could leak another user's list. Mirror the guards in getModels/refreshModels.
+	const providerName = typeof options === "string" ? options : options.provider
+	if (isAuthScopedProvider(providerName as RouterName)) {
+		return undefined
+	}
+
 	const cacheKey = typeof options === "string" ? options : getCacheKey(options)
 	// Check memory cache first (fast)
 	const memoryModels = memoryCache.get<ModelRecord>(cacheKey)
diff --git a/src/api/providers/router-provider.ts b/src/api/providers/router-provider.ts
index 84c5774bb6..b0315bc416 100644
--- a/src/api/providers/router-provider.ts
+++ b/src/api/providers/router-provider.ts
@@ -83,8 +83,11 @@ export abstract class RouterProvider extends BaseProvider {
 			return { id, info: cachedModels[id] }
 		}
 
-		// Last resort: return default model
-		return { id: this.defaultModelId, info: this.defaultModelInfo }
+		// Last resort: preserve the configured model ID (falling back to the default
+		// only when none is configured) so an as-yet-unfetched model isn't silently
+		// swapped for the hardcoded default. info still comes from defaults since we
+		// have no fetched or cached metadata for the configured model at this point.
+		return { id, info: this.defaultModelInfo }
 	}
 
 	protected supportsTemperature(modelId: string): boolean {
diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts
index 0eaf1126dc..79a3ced3a2 100644
--- a/src/core/webview/webviewMessageHandler.ts
+++ b/src/core/webview/webviewMessageHandler.ts
@@ -1006,7 +1006,7 @@ export const webviewMessageHandler = async (
 			// For providers that need credentials, use their specific handlers
 			await flushModels({ provider: routerNameFlush } as GetModelsOptions, true)
 			break
-		case "requestRouterModels":
+		case "requestRouterModels": {
 			const { apiConfiguration } = await provider.getState()
 
 			// Optional single provider filter from webview
@@ -1187,6 +1187,7 @@ export const webviewMessageHandler = async (
 				values: providerFilter ? { provider: requestedProvider } : undefined,
 			})
 			break
+		}
 		case "requestOllamaModels": {
 			// Specific handler for Ollama models only.
 			const { apiConfiguration: ollamaApiConfig } = await provider.getState()

From c505a6bc69b75c578194b5d5823f6d95dcd24987 Mon Sep 17 00:00:00 2001
From: awschmeder <awschmeder@lbl.gov>
Date: Thu, 18 Jun 2026 21:34:38 -0700
Subject: [PATCH 3/9] chore: remove ineffective codeql suppression directives

The inline // codeql[js/insufficient-password-hash] comments did not
suppress the alert because CodeQL tracks taint flow from source variables
in other files. Retaining the explanatory prose comments for human
reviewers; the false positive requires maintainer dismissal in the
GitHub Security tab.
---
 src/api/providers/fetchers/modelCache.ts | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/api/providers/fetchers/modelCache.ts b/src/api/providers/fetchers/modelCache.ts
index c953232c59..97ce5ccfa6 100644
--- a/src/api/providers/fetchers/modelCache.ts
+++ b/src/api/providers/fetchers/modelCache.ts
@@ -94,7 +94,6 @@ function getCacheKey(options: GetModelsOptions): string {
 	// Not a password hash -- SHA-256 is used here purely as a cache key discriminator to
 	// distinguish between different API keys on the same server. It is never used for
 	// authentication or stored as a credential.
-	// codeql[js/insufficient-password-hash]
 	const keyPart =
 		isKeyScoped && options.apiKey
 			? createHash("sha256").update(options.apiKey).digest("hex").slice(0, 16)
@@ -115,7 +114,6 @@ function cacheKeyToFilename(cacheKey: string): string {
 	const prefix = cacheKey.split(":")[0] // provider name -- always filesystem-safe
 	// Not a password hash -- SHA-256 is used here to produce a collision-free filename
 	// component from the compound cache key. It is never used for authentication.
-	// codeql[js/insufficient-password-hash]
 	const hash = createHash("sha256").update(cacheKey).digest("hex").slice(0, 16)
 	return `${prefix}_${hash}`
 }

From bf83be7c00072cdf1ba29d4d99d08085f00f7f91 Mon Sep 17 00:00:00 2001
From: awschmeder <awschmeder@lbl.gov>
Date: Sun, 21 Jun 2026 10:12:48 -0700
Subject: [PATCH 4/9] test(webviewMessageHandler): correct LiteLLM credential
 priority assertion

The test "prefers config values over message values for LiteLLM" was
asserting the old || behavior. The handler now uses ?? so message.values
takes precedence over saved config, matching the DeepSeek pattern and
allowing unsaved settings UI state to be used during Sync Models.
---
 src/core/webview/__tests__/webviewMessageHandler.spec.ts | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/core/webview/__tests__/webviewMessageHandler.spec.ts b/src/core/webview/__tests__/webviewMessageHandler.spec.ts
index 9704a7229d..f1dcb4598d 100644
--- a/src/core/webview/__tests__/webviewMessageHandler.spec.ts
+++ b/src/core/webview/__tests__/webviewMessageHandler.spec.ts
@@ -629,7 +629,7 @@ describe("webviewMessageHandler - requestRouterModels", () => {
 		})
 	})
 
-	it("prefers config values over message values for LiteLLM", async () => {
+	it("prefers message values over config values for LiteLLM", async () => {
 		const mockModels: ModelRecord = {}
 		mockGetModels.mockResolvedValue(mockModels)
 
@@ -641,11 +641,11 @@ describe("webviewMessageHandler - requestRouterModels", () => {
 			},
 		})
 
-		// Verify config values are used over message values
+		// Verify message values take precedence over saved config (current unsaved field state wins)
 		expect(mockGetModels).toHaveBeenCalledWith({
 			provider: "litellm",
-			apiKey: "litellm-key", // From config
-			baseUrl: "http://localhost:4000", // From config
+			apiKey: "message-key", // From message.values
+			baseUrl: "http://message-url", // From message.values
 		})
 	})
 })

From a28a83496ddf40216ca2752619360620391818c5 Mon Sep 17 00:00:00 2001
From: awschmeder <awschmeder@lbl.gov>
Date: Tue, 23 Jun 2026 10:50:29 -0700
Subject: [PATCH 5/9] fix(security): derive cache-key discriminator without
 flagged API-key hashing

Replace the SHA-256 hash of the API key in the LiteLLM/key-scoped cache key
with a memoized, truncated PBKDF2-derived 32-bit discriminator. This resolves
the CodeQL js/insufficient-password-hash alert at the source (Node crypto.pbkdf2Sync
is not modeled as a weak-hash sink) rather than dismissing it as a false positive,
and the heavy truncation makes the value written to the on-disk cache filename
non-reversible to the API key. Add provider-agnostic tests covering per-key
separation, determinism, and the non-identifying discriminator shape.
---
 .changeset/fix-litellm-model-desync.md        | 10 +--
 .../fetchers/__tests__/modelCache.spec.ts     | 70 +++++++++++++++++++
 src/api/providers/fetchers/modelCache.ts      | 54 ++++++++++----
 3 files changed, 118 insertions(+), 16 deletions(-)

diff --git a/.changeset/fix-litellm-model-desync.md b/.changeset/fix-litellm-model-desync.md
index f411e21127..55b295fb16 100644
--- a/.changeset/fix-litellm-model-desync.md
+++ b/.changeset/fix-litellm-model-desync.md
@@ -11,10 +11,12 @@ Two bugs are addressed:
    profiles backed by different servers silently served the wrong model list and the stale list
    persisted across VS Code restarts via the disk cache. Fixed with a compound cache key:
    URL-scoped providers use `provider:baseUrl`; key-scoped providers (LiteLLM, Poe, Requesty)
-   additionally include a short sha256 hash of the API key (`provider:baseUrl:sha256(apiKey)`) so that
-   two different API keys on the same server never share a cache entry (relevant when the server
-   enforces per-key model allowlists). The `RouterProvider.getModel()` cold-start fallback is also
-   corrected to pass the full options so it resolves the same compound key.
+   additionally include a short, irreversible discriminator derived from the API key
+   (`provider:baseUrl:<discriminator>`) so that two different API keys on the same server never share
+   a cache entry (relevant when the server enforces per-key model allowlists). The discriminator is a
+   32-bit value derived via PBKDF2 and truncated so it cannot be reversed to identify the API key
+   that is written to the on-disk cache filename. The `RouterProvider.getModel()` cold-start fallback
+   is also corrected to pass the full options so it resolves the same compound key.
 
 2. **Silent fallback to hardcoded default**: When the LiteLLM model list was empty (due to the
    collision above, a failed sync, or a transient error), `useSelectedModel` reset the configured
diff --git a/src/api/providers/fetchers/__tests__/modelCache.spec.ts b/src/api/providers/fetchers/__tests__/modelCache.spec.ts
index 11395485a9..37cb763aad 100644
--- a/src/api/providers/fetchers/__tests__/modelCache.spec.ts
+++ b/src/api/providers/fetchers/__tests__/modelCache.spec.ts
@@ -434,3 +434,73 @@ describe("empty cache protection", () => {
 		})
 	})
 })
+
+describe("key-scoped cache key derivation", () => {
+	// Exercises the per-API-key cache discriminator that all KEY_SCOPED_PROVIDERS share.
+	// Requesty is used only because it is a key-scoped provider with a mocked fetcher; the
+	// behavior under test is provider-agnostic.
+	const keyScopedProvider = "requesty" as const
+
+	let mockCache: any
+	let mockSet: Mock
+
+	const mockModels = {
+		"key-scoped/model": {
+			maxTokens: 4096,
+			contextWindow: 200000,
+			supportsPromptCache: false,
+			description: "Key-scoped provider model",
+		},
+	}
+
+	beforeEach(() => {
+		vi.clearAllMocks()
+		const MockedNodeCache = vi.mocked(NodeCache)
+		mockCache = new MockedNodeCache()
+		mockCache.get.mockReturnValue(undefined)
+		mockSet = mockCache.set
+		mockGetRequestyModels.mockResolvedValue(mockModels)
+	})
+
+	// Returns the cache key the result was written under (first arg of the matching set call).
+	const writtenCacheKey = (): string => {
+		const call = mockSet.mock.calls.find((c) => c[1] === mockModels)
+		return call?.[0] as string
+	}
+
+	it("writes different cache keys for different API keys", async () => {
+		await getModels({ provider: keyScopedProvider, apiKey: "key-one" })
+		const firstKey = writtenCacheKey()
+
+		mockSet.mockClear()
+		await getModels({ provider: keyScopedProvider, apiKey: "key-two" })
+		const secondKey = writtenCacheKey()
+
+		expect(firstKey).toBeDefined()
+		expect(secondKey).toBeDefined()
+		expect(firstKey).not.toEqual(secondKey)
+	})
+
+	it("writes the same cache key for repeated calls with the same API key", async () => {
+		await getModels({ provider: keyScopedProvider, apiKey: "stable-key" })
+		const firstKey = writtenCacheKey()
+
+		mockSet.mockClear()
+		await getModels({ provider: keyScopedProvider, apiKey: "stable-key" })
+		const secondKey = writtenCacheKey()
+
+		expect(firstKey).toEqual(secondKey)
+	})
+
+	it("does not embed the raw API key in the cache key and truncates the discriminator", async () => {
+		const apiKey = "super-secret-api-key-value"
+		await getModels({ provider: keyScopedProvider, apiKey })
+		const cacheKey = writtenCacheKey()
+
+		// The raw secret must never appear in the on-disk-bound cache key.
+		expect(cacheKey).not.toContain(apiKey)
+		// The discriminator is the trailing key-component: an 8-char (32-bit) hex string.
+		const discriminator = cacheKey.split(":").pop() as string
+		expect(discriminator).toMatch(/^[0-9a-f]{8}$/)
+	})
+})
diff --git a/src/api/providers/fetchers/modelCache.ts b/src/api/providers/fetchers/modelCache.ts
index 97ce5ccfa6..bb1fef738f 100644
--- a/src/api/providers/fetchers/modelCache.ts
+++ b/src/api/providers/fetchers/modelCache.ts
@@ -1,7 +1,7 @@
 import * as path from "path"
 import fs from "fs/promises"
 import * as fsSync from "fs"
-import { createHash } from "crypto"
+import { createHash, pbkdf2Sync } from "crypto"
 
 import NodeCache from "node-cache"
 import { z } from "zod"
@@ -71,14 +71,51 @@ function isAuthScopedProvider(provider: RouterName): boolean {
 	return AUTH_SCOPED_PROVIDERS.has(provider)
 }
 
+// Memoize derived discriminators so the deliberately-structureless KDF runs at most once
+// per distinct secret per session (getCacheKey runs on every cache lookup).
+const apiKeyDiscriminatorCache = new Map<string, string>()
+
+// Fixed, non-secret application salt. This is NOT credential storage: it derives a short,
+// stable cache-key discriminator from the API key so two different keys on the same server
+// never share a cache entry. PBKDF2 is used (over a plain hash) only to obtain a uniform,
+// structureless mapping with no exploitable internal structure; the iteration count is
+// intentionally modest because security here rests on truncation, not on KDF slowness.
+const API_KEY_DISCRIMINATOR_SALT = "zoo-model-cache-key-v1"
+const API_KEY_DISCRIMINATOR_ITERATIONS = 10_000
+// 4 bytes (8 hex chars) = 32 bits. This is deliberately far smaller than the entropy of a
+// real API key: collisions across the handful of keys a single user configures are
+// negligible (birthday bound ~ n^2 / 2^33), while the output is small enough that any
+// preimage search yields an astronomically large set of candidate keys -- so the value
+// written to the on-disk cache filename cannot be reversed to identify the actual key.
+const API_KEY_DISCRIMINATOR_BYTES = 4
+
+/**
+ * Derive a short, irreversible, non-identifying cache-key discriminator from an API key.
+ * See the constants above for the rationale behind the algorithm and parameter choices.
+ */
+function deriveApiKeyDiscriminator(apiKey: string): string {
+	const cached = apiKeyDiscriminatorCache.get(apiKey)
+	if (cached) return cached
+	const discriminator = pbkdf2Sync(
+		apiKey,
+		API_KEY_DISCRIMINATOR_SALT,
+		API_KEY_DISCRIMINATOR_ITERATIONS,
+		API_KEY_DISCRIMINATOR_BYTES,
+		"sha256",
+	).toString("hex")
+	apiKeyDiscriminatorCache.set(apiKey, discriminator)
+	return discriminator
+}
+
 /**
  * Build a cache key that is unique per provider+server+key combination.
  *
  * - URL-scoped providers include the normalized baseUrl so that two different servers
  *   of the same provider type never share a cache entry.
- * - Key-scoped providers additionally fold in a short sha256 hash of the API key so that
- *   two different API keys on the same server never share a cache entry (relevant when
- *   the server enforces per-key model allowlists, e.g. LiteLLM, Poe, Requesty).
+ * - Key-scoped providers additionally fold in a short, irreversible discriminator derived
+ *   from the API key so that two different API keys on the same server never share a cache
+ *   entry (relevant when the server enforces per-key model allowlists, e.g. LiteLLM, Poe,
+ *   Requesty). See deriveApiKeyDiscriminator for why the value cannot be reversed to the key.
  */
 function getCacheKey(options: GetModelsOptions): string {
 	const { provider } = options
@@ -90,14 +127,7 @@ function getCacheKey(options: GetModelsOptions): string {
 	// different keys on the default server would collapse to the same entry).
 	// Strip trailing slashes so "http://host:4000/" and "http://host:4000" map to the same key.
 	const urlPart = isUrlScoped && options.baseUrl ? options.baseUrl.replace(/\/+$/, "") : undefined
-	// Short (16-char) sha256 prefix -- enough to make collisions effectively impossible.
-	// Not a password hash -- SHA-256 is used here purely as a cache key discriminator to
-	// distinguish between different API keys on the same server. It is never used for
-	// authentication or stored as a credential.
-	const keyPart =
-		isKeyScoped && options.apiKey
-			? createHash("sha256").update(options.apiKey).digest("hex").slice(0, 16)
-			: undefined
+	const keyPart = isKeyScoped && options.apiKey ? deriveApiKeyDiscriminator(options.apiKey) : undefined
 
 	if (urlPart && keyPart) return `${provider}:${urlPart}:${keyPart}`
 	if (urlPart) return `${provider}:${urlPart}`

From da414681fb630c046c187e709024ff28d1cef2c3 Mon Sep 17 00:00:00 2001
From: awschmeder <awschmeder@lbl.gov>
Date: Tue, 23 Jun 2026 10:59:59 -0700
Subject: [PATCH 6/9] fix(security): derive cache filename digest via KDF to
 clear residual CodeQL taint

The API-key discriminator is embedded in the compound cache key, so CodeQL taint
tracking treats the cache key as password-derived and flagged the remaining
createHash(sha256) in cacheKeyToFilename. Route the filename digest through the
same truncated PBKDF2 helper (deriveCacheDigest) and remove createHash entirely,
eliminating the last weak-hash sink the tainted value can reach. Filename width
is preserved (64-bit/16-hex).
---
 .changeset/fix-litellm-model-desync.md   |  8 +--
 src/api/providers/fetchers/modelCache.ts | 75 ++++++++++++++----------
 2 files changed, 47 insertions(+), 36 deletions(-)

diff --git a/.changeset/fix-litellm-model-desync.md b/.changeset/fix-litellm-model-desync.md
index 55b295fb16..3598784912 100644
--- a/.changeset/fix-litellm-model-desync.md
+++ b/.changeset/fix-litellm-model-desync.md
@@ -13,10 +13,10 @@ Two bugs are addressed:
    URL-scoped providers use `provider:baseUrl`; key-scoped providers (LiteLLM, Poe, Requesty)
    additionally include a short, irreversible discriminator derived from the API key
    (`provider:baseUrl:<discriminator>`) so that two different API keys on the same server never share
-   a cache entry (relevant when the server enforces per-key model allowlists). The discriminator is a
-   32-bit value derived via PBKDF2 and truncated so it cannot be reversed to identify the API key
-   that is written to the on-disk cache filename. The `RouterProvider.getModel()` cold-start fallback
-   is also corrected to pass the full options so it resolves the same compound key.
+   a cache entry (relevant when the server enforces per-key model allowlists). Both the discriminator
+   and the on-disk filename digest are derived via truncated PBKDF2 so neither can be reversed to
+   identify the API key written to the cache filename. The `RouterProvider.getModel()` cold-start
+   fallback is also corrected to pass the full options so it resolves the same compound key.
 
 2. **Silent fallback to hardcoded default**: When the LiteLLM model list was empty (due to the
    collision above, a failed sync, or a transient error), `useSelectedModel` reset the configured
diff --git a/src/api/providers/fetchers/modelCache.ts b/src/api/providers/fetchers/modelCache.ts
index bb1fef738f..e13c5a994d 100644
--- a/src/api/providers/fetchers/modelCache.ts
+++ b/src/api/providers/fetchers/modelCache.ts
@@ -1,7 +1,7 @@
 import * as path from "path"
 import fs from "fs/promises"
 import * as fsSync from "fs"
-import { createHash, pbkdf2Sync } from "crypto"
+import { pbkdf2Sync } from "crypto"
 
 import NodeCache from "node-cache"
 import { z } from "zod"
@@ -71,40 +71,50 @@ function isAuthScopedProvider(provider: RouterName): boolean {
 	return AUTH_SCOPED_PROVIDERS.has(provider)
 }
 
-// Memoize derived discriminators so the deliberately-structureless KDF runs at most once
-// per distinct secret per session (getCacheKey runs on every cache lookup).
-const apiKeyDiscriminatorCache = new Map<string, string>()
-
-// Fixed, non-secret application salt. This is NOT credential storage: it derives a short,
-// stable cache-key discriminator from the API key so two different keys on the same server
-// never share a cache entry. PBKDF2 is used (over a plain hash) only to obtain a uniform,
-// structureless mapping with no exploitable internal structure; the iteration count is
-// intentionally modest because security here rests on truncation, not on KDF slowness.
-const API_KEY_DISCRIMINATOR_SALT = "zoo-model-cache-key-v1"
-const API_KEY_DISCRIMINATOR_ITERATIONS = 10_000
-// 4 bytes (8 hex chars) = 32 bits. This is deliberately far smaller than the entropy of a
-// real API key: collisions across the handful of keys a single user configures are
-// negligible (birthday bound ~ n^2 / 2^33), while the output is small enough that any
-// preimage search yields an astronomically large set of candidate keys -- so the value
-// written to the on-disk cache filename cannot be reversed to identify the actual key.
+// Memoize derived digests so the deliberately-structureless KDF runs at most once per
+// distinct input per session (getCacheKey / cacheKeyToFilename run on every cache lookup).
+const cacheDigestCache = new Map<string, string>()
+
+// Fixed, non-secret application salt. This is NOT credential storage: it derives short,
+// stable cache-key components from the API key and the compound cache key so that distinct
+// inputs map to distinct cache entries / filenames. PBKDF2 is used (over a plain hash) only
+// to obtain a uniform, structureless mapping with no exploitable internal structure; the
+// iteration count is intentionally modest because security here rests on truncation, not on
+// KDF slowness. Using a KDF rather than a plain digest also keeps API-key-derived values off
+// CodeQL's js/insufficient-password-hash sink, which flags any password-tainted value flowing
+// into a non-password hashing operation -- and that taint propagates to anything derived from
+// the key, including the compound cache key hashed for the on-disk filename.
+const CACHE_DIGEST_SALT = "zoo-model-cache-key-v1"
+const CACHE_DIGEST_ITERATIONS = 10_000
+
+/**
+ * Derive a short, irreversible, truncated digest of a cache input.
+ *
+ * The output is deliberately far smaller than the entropy of a real API key: collisions
+ * across the handful of keys/servers a single user configures are negligible (birthday bound
+ * ~ n^2 / 2^(8*bytes)), while the truncated output is small enough that any preimage search
+ * yields an astronomically large set of candidate inputs -- so a value written to an on-disk
+ * cache filename cannot be reversed to identify the API key it was derived from.
+ */
+function deriveCacheDigest(value: string, bytes: number): string {
+	const memoKey = `${bytes}:${value}`
+	const cached = cacheDigestCache.get(memoKey)
+	if (cached) return cached
+	const digest = pbkdf2Sync(value, CACHE_DIGEST_SALT, CACHE_DIGEST_ITERATIONS, bytes, "sha256").toString("hex")
+	cacheDigestCache.set(memoKey, digest)
+	return digest
+}
+
+// 4 bytes (8 hex chars) = 32 bits for the per-API-key discriminator embedded in the cache key.
 const API_KEY_DISCRIMINATOR_BYTES = 4
+// 8 bytes (16 hex chars) = 64 bits for the filename digest, preserving the prior filename width.
+const FILENAME_DIGEST_BYTES = 8
 
 /**
  * Derive a short, irreversible, non-identifying cache-key discriminator from an API key.
- * See the constants above for the rationale behind the algorithm and parameter choices.
  */
 function deriveApiKeyDiscriminator(apiKey: string): string {
-	const cached = apiKeyDiscriminatorCache.get(apiKey)
-	if (cached) return cached
-	const discriminator = pbkdf2Sync(
-		apiKey,
-		API_KEY_DISCRIMINATOR_SALT,
-		API_KEY_DISCRIMINATOR_ITERATIONS,
-		API_KEY_DISCRIMINATOR_BYTES,
-		"sha256",
-	).toString("hex")
-	apiKeyDiscriminatorCache.set(apiKey, discriminator)
-	return discriminator
+	return deriveCacheDigest(apiKey, API_KEY_DISCRIMINATOR_BYTES)
 }
 
 /**
@@ -142,9 +152,10 @@ function getCacheKey(options: GetModelsOptions): string {
  */
 function cacheKeyToFilename(cacheKey: string): string {
 	const prefix = cacheKey.split(":")[0] // provider name -- always filesystem-safe
-	// Not a password hash -- SHA-256 is used here to produce a collision-free filename
-	// component from the compound cache key. It is never used for authentication.
-	const hash = createHash("sha256").update(cacheKey).digest("hex").slice(0, 16)
+	// The compound cache key embeds the API-key discriminator, so it is treated as
+	// password-tainted by static analysis; deriveCacheDigest keeps the filename derivation
+	// off the weak-hash sink while still producing a collision-free, irreversible component.
+	const hash = deriveCacheDigest(cacheKey, FILENAME_DIGEST_BYTES)
 	return `${prefix}_${hash}`
 }
 

From 5a2a2d88273940b09f76110803376f391a82fa81 Mon Sep 17 00:00:00 2001
From: awschmeder <awschmeder@lbl.gov>
Date: Tue, 23 Jun 2026 11:20:49 -0700
Subject: [PATCH 7/9] test: cover all getCacheKey scoping branches in
 modelCache

---
 .../fetchers/__tests__/modelCache.spec.ts     | 66 +++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/src/api/providers/fetchers/__tests__/modelCache.spec.ts b/src/api/providers/fetchers/__tests__/modelCache.spec.ts
index 37cb763aad..ee3a6fa04f 100644
--- a/src/api/providers/fetchers/__tests__/modelCache.spec.ts
+++ b/src/api/providers/fetchers/__tests__/modelCache.spec.ts
@@ -504,3 +504,69 @@ describe("key-scoped cache key derivation", () => {
 		expect(discriminator).toMatch(/^[0-9a-f]{8}$/)
 	})
 })
+
+describe("compound cache key derivation across scoping dimensions", () => {
+	// Exercises every branch of getCacheKey via the public getModels() entry point.
+	// litellm is url-scoped AND key-scoped; openrouter is neither, so it hits the bare
+	// provider fallback. The fetcher mocks let us observe the cache key the result is
+	// written under (first arg of the matching memoryCache.set call).
+	const mockModels = {
+		"compound/model": {
+			maxTokens: 4096,
+			contextWindow: 200000,
+			supportsPromptCache: false,
+			description: "Compound cache key model",
+		},
+	}
+
+	let mockSet: Mock
+
+	beforeEach(() => {
+		vi.clearAllMocks()
+		const MockedNodeCache = vi.mocked(NodeCache)
+		const mockCache = new MockedNodeCache()
+		mockCache.get.mockReturnValue(undefined)
+		mockSet = mockCache.set
+		mockGetLiteLLMModels.mockResolvedValue(mockModels)
+		mockGetOpenRouterModels.mockResolvedValue(mockModels)
+	})
+
+	const writtenCacheKey = (): string => {
+		const call = mockSet.mock.calls.find((c) => c[1] === mockModels)
+		return call?.[0] as string
+	}
+
+	it("includes both the server URL and the key discriminator for url+key-scoped providers", async () => {
+		await getModels({ provider: "litellm", apiKey: "compound-key", baseUrl: "http://host:4000" })
+		const cacheKey = writtenCacheKey()
+
+		// Expected shape: provider:url:keyDiscriminator
+		expect(cacheKey).toMatch(/^litellm:http:\/\/host:4000:[0-9a-f]{8}$/)
+	})
+
+	it("normalizes trailing slashes in the server URL so equivalent URLs share a cache key", async () => {
+		await getModels({ provider: "litellm", apiKey: "compound-key", baseUrl: "http://host:4000/" })
+		const withSlash = writtenCacheKey()
+
+		mockSet.mockClear()
+		await getModels({ provider: "litellm", apiKey: "compound-key", baseUrl: "http://host:4000" })
+		const withoutSlash = writtenCacheKey()
+
+		expect(withSlash).toEqual(withoutSlash)
+	})
+
+	it("includes only the server URL when a url-scoped provider has no API key", async () => {
+		await getModels({ provider: "litellm", baseUrl: "http://host:4000" })
+		const cacheKey = writtenCacheKey()
+
+		// No trailing key discriminator when apiKey is absent.
+		expect(cacheKey).toBe("litellm:http://host:4000")
+	})
+
+	it("falls back to the bare provider name for providers that are neither url- nor key-scoped", async () => {
+		await getModels({ provider: "openrouter", apiKey: "ignored-key", baseUrl: "http://ignored:4000" })
+		const cacheKey = writtenCacheKey()
+
+		expect(cacheKey).toBe("openrouter")
+	})
+})

From 2f37af8daa5902fad933a6faba5665613b6216fe Mon Sep 17 00:00:00 2001
From: awschmeder <awschmeder@lbl.gov>
Date: Tue, 23 Jun 2026 11:56:05 -0700
Subject: [PATCH 8/9] fix: resolve check-types errors in modelCache tests

---
 src/api/providers/fetchers/__tests__/modelCache.spec.ts | 4 ++--
 src/api/providers/fetchers/modelCache.ts                | 3 +--
 src/shared/api.ts                                       | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/api/providers/fetchers/__tests__/modelCache.spec.ts b/src/api/providers/fetchers/__tests__/modelCache.spec.ts
index ee3a6fa04f..b234807431 100644
--- a/src/api/providers/fetchers/__tests__/modelCache.spec.ts
+++ b/src/api/providers/fetchers/__tests__/modelCache.spec.ts
@@ -525,8 +525,8 @@ describe("compound cache key derivation across scoping dimensions", () => {
 		vi.clearAllMocks()
 		const MockedNodeCache = vi.mocked(NodeCache)
 		const mockCache = new MockedNodeCache()
-		mockCache.get.mockReturnValue(undefined)
-		mockSet = mockCache.set
+		;(mockCache.get as Mock).mockReturnValue(undefined)
+		mockSet = mockCache.set as unknown as Mock
 		mockGetLiteLLMModels.mockResolvedValue(mockModels)
 		mockGetOpenRouterModels.mockResolvedValue(mockModels)
 	})
diff --git a/src/api/providers/fetchers/modelCache.ts b/src/api/providers/fetchers/modelCache.ts
index e13c5a994d..a49a344914 100644
--- a/src/api/providers/fetchers/modelCache.ts
+++ b/src/api/providers/fetchers/modelCache.ts
@@ -197,8 +197,7 @@ async function fetchModelsFromProvider(options: GetModelsOptions): Promise<Model
 			models = await getUnboundModels(options.apiKey)
 			break
 		case "litellm":
-			// Type safety ensures apiKey and baseUrl are always provided for LiteLLM.
-			models = await getLiteLLMModels(options.apiKey, options.baseUrl)
+			models = await getLiteLLMModels(options.apiKey ?? "", options.baseUrl)
 			break
 		case "ollama":
 			models = await getOllamaModels(options.baseUrl, options.apiKey)
diff --git a/src/shared/api.ts b/src/shared/api.ts
index c0db55f661..fe2042bd5f 100644
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -179,7 +179,7 @@ const dynamicProviderExtras = {
 	openrouter: {} as {}, // eslint-disable-line @typescript-eslint/no-empty-object-type
 	"vercel-ai-gateway": {} as {}, // eslint-disable-line @typescript-eslint/no-empty-object-type
 	"zoo-gateway": {} as { apiKey?: string; baseUrl?: string },
-	litellm: {} as { apiKey: string; baseUrl: string },
+	litellm: {} as { apiKey?: string; baseUrl: string },
 	requesty: {} as { apiKey?: string; baseUrl?: string },
 	unbound: {} as { apiKey?: string },
 	ollama: {} as {}, // eslint-disable-line @typescript-eslint/no-empty-object-type

From 58ec401c941d91cae29198fc0fd1949d1ef1b586 Mon Sep 17 00:00:00 2001
From: awschmeder <awschmeder@lbl.gov>
Date: Fri, 26 Jun 2026 00:12:00 -0700
Subject: [PATCH 9/9] fix: address PR review for LiteLLM cache key collision
 fix

- poe/lm-studio getModel() pass GetModelsOptions so compound cache key matches; bare-string lookups previously always missed and fell back to defaults
- router-provider uses || so empty-string modelId falls back to default instead of forwarding "" to the API
- LiteLLM Sync invalidates the exact ["routerModels","litellm"] query rather than the broad ["routerModels"] prefix
- document in-flight dedup microtask-safety invariant in modelCache
- add tests: list non-empty->empty transition preserves selection; key-scoped in-flight dedup (different keys -> separate fetch)
---
 .../fetchers/__tests__/modelCache.spec.ts     | 46 ++++++++++++++++++
 src/api/providers/fetchers/modelCache.ts      |  8 +++-
 src/api/providers/lm-studio.ts                |  5 +-
 src/api/providers/poe.ts                      |  6 ++-
 src/api/providers/router-provider.ts          |  8 +++-
 .../components/settings/providers/LiteLLM.tsx |  9 ++--
 .../hooks/__tests__/useSelectedModel.spec.ts  | 48 +++++++++++++++++++
 7 files changed, 123 insertions(+), 7 deletions(-)

diff --git a/src/api/providers/fetchers/__tests__/modelCache.spec.ts b/src/api/providers/fetchers/__tests__/modelCache.spec.ts
index b234807431..5b771f2bda 100644
--- a/src/api/providers/fetchers/__tests__/modelCache.spec.ts
+++ b/src/api/providers/fetchers/__tests__/modelCache.spec.ts
@@ -432,6 +432,52 @@ describe("empty cache protection", () => {
 			expect(result1).toEqual(mockModels)
 			expect(result2).toEqual(mockModels)
 		})
+
+		it("scopes in-flight dedup by API key for key-scoped providers", async () => {
+			// In-flight dedup is keyed on the compound cache key, so concurrent refreshes for a
+			// key-scoped provider must dedup only when the API key matches. Two different keys
+			// (different compound keys) each trigger their own fetch; the same key shares one.
+			const mockModels = {
+				"requesty/model": {
+					maxTokens: 4096,
+					contextWindow: 200000,
+					supportsPromptCache: false,
+					description: "Requesty model",
+				},
+			}
+			mockGetRequestyModels.mockResolvedValue(mockModels)
+
+			const { refreshModels } = await import("../modelCache")
+
+			// Different keys -> separate compound keys -> two distinct fetches.
+			const [a, b] = await Promise.all([
+				refreshModels({ provider: "requesty", apiKey: "key-one" }),
+				refreshModels({ provider: "requesty", apiKey: "key-two" }),
+			])
+			expect(mockGetRequestyModels).toHaveBeenCalledTimes(2)
+			expect(a).toEqual(mockModels)
+			expect(b).toEqual(mockModels)
+
+			mockGetRequestyModels.mockClear()
+
+			// Same key -> same compound key -> a single shared in-flight fetch.
+			let resolveShared: (value: typeof mockModels) => void
+			mockGetRequestyModels.mockReturnValue(
+				new Promise<typeof mockModels>((resolve) => {
+					resolveShared = resolve
+				}),
+			)
+
+			const shared1 = refreshModels({ provider: "requesty", apiKey: "same-key" })
+			const shared2 = refreshModels({ provider: "requesty", apiKey: "same-key" })
+
+			expect(mockGetRequestyModels).toHaveBeenCalledTimes(1)
+
+			resolveShared!(mockModels)
+			const [s1, s2] = await Promise.all([shared1, shared2])
+			expect(s1).toEqual(mockModels)
+			expect(s2).toEqual(mockModels)
+		})
 	})
 })
 
diff --git a/src/api/providers/fetchers/modelCache.ts b/src/api/providers/fetchers/modelCache.ts
index a49a344914..312f4f9382 100644
--- a/src/api/providers/fetchers/modelCache.ts
+++ b/src/api/providers/fetchers/modelCache.ts
@@ -310,7 +310,13 @@ export const refreshModels = async (options: GetModelsOptions): Promise<ModelRec
 		}
 	}
 
-	// Create the refresh promise and track it
+	// Create the refresh promise and track it.
+	//
+	// The `finally` cleanup below runs only after the first `await` inside this async
+	// function yields, which cannot happen until the current synchronous run -- including
+	// the `inFlightRefresh.set(cacheKey, ...)` registration below -- has completed. So the
+	// entry is always present in the map before `finally` can delete it; the registration
+	// can never be lost to a microtask race even if the fetch resolves immediately.
 	const refreshPromise = (async (): Promise<ModelRecord> => {
 		try {
 			// Force fresh API fetch - skip getModelsFromCache() check
diff --git a/src/api/providers/lm-studio.ts b/src/api/providers/lm-studio.ts
index d04bd157c7..0567fa0aae 100644
--- a/src/api/providers/lm-studio.ts
+++ b/src/api/providers/lm-studio.ts
@@ -170,7 +170,10 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 	}
 
 	override getModel(): { id: string; info: ModelInfo } {
-		const models = getModelsFromCache("lmstudio")
+		const models = getModelsFromCache({
+			provider: "lmstudio",
+			baseUrl: this.options.lmStudioBaseUrl,
+		})
 		if (models && this.options.lmStudioModelId && models[this.options.lmStudioModelId]) {
 			return {
 				id: this.options.lmStudioModelId,
diff --git a/src/api/providers/poe.ts b/src/api/providers/poe.ts
index 536d222acd..9513a1445d 100644
--- a/src/api/providers/poe.ts
+++ b/src/api/providers/poe.ts
@@ -38,7 +38,11 @@ export class PoeHandler extends BaseProvider implements SingleCompletionHandler
 
 	override getModel() {
 		const id = this.options.apiModelId ?? poeDefaultModelId
-		const cached = getModelsFromCache("poe")
+		const cached = getModelsFromCache({
+			provider: "poe",
+			apiKey: this.options.poeApiKey,
+			baseUrl: this.options.poeBaseUrl,
+		})
 		const info: ModelInfo = cached?.[id] ?? getPoeDefaultModelInfo()
 		return { id, info }
 	}
diff --git a/src/api/providers/router-provider.ts b/src/api/providers/router-provider.ts
index b0315bc416..7a983ed12e 100644
--- a/src/api/providers/router-provider.ts
+++ b/src/api/providers/router-provider.ts
@@ -62,7 +62,13 @@ export abstract class RouterProvider extends BaseProvider {
 	}
 
 	override getModel(): { id: string; info: ModelInfo } {
-		const id = this.modelId ?? this.defaultModelId
+		// Use `||` (not `??`) so an empty-string modelId also falls back to the default,
+		// guaranteeing a non-empty id rather than forwarding "" to the API as an invalid
+		// request. Note this guarantees non-empty, not viable: defaultModelId is provider-
+		// supplied and may not be a model that actually exists on the user's server (e.g.
+		// OpenAI-compatible have no inherent default), so a configured-but-empty selection
+		// can still resolve to a model the server rejects.
+		const id = this.modelId || this.defaultModelId
 
 		// First check instance models (populated by fetchModel)
 		if (this.models[id]) {
diff --git a/webview-ui/src/components/settings/providers/LiteLLM.tsx b/webview-ui/src/components/settings/providers/LiteLLM.tsx
index 0063afaa17..2a8dcb8d67 100644
--- a/webview-ui/src/components/settings/providers/LiteLLM.tsx
+++ b/webview-ui/src/components/settings/providers/LiteLLM.tsx
@@ -57,9 +57,12 @@ export const LiteLLM = ({
 				if (refreshStatus === "loading") {
 					if (!litellmErrorJustReceived.current) {
 						setRefreshStatus("success")
-						// Invalidate the react-query router models cache so
-						// useSelectedModel picks up the refreshed list.
-						queryClient.invalidateQueries({ queryKey: ["routerModels"] })
+						// Invalidate only the LiteLLM router-models query so useSelectedModel
+						// picks up the refreshed list. useSelectedModel reads LiteLLM under the
+						// compound key ["routerModels", "litellm"] (see useRouterModels), so we
+						// target that exact key rather than the bare ["routerModels"] prefix,
+						// which would needlessly invalidate every other provider's query too.
+						queryClient.invalidateQueries({ queryKey: ["routerModels", "litellm"] })
 					}
 					// If litellmErrorJustReceived.current is true, status is already (or will be) "error".
 				}
diff --git a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
index 34322e4e96..9e84ec364b 100644
--- a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
+++ b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
@@ -602,6 +602,54 @@ describe("useSelectedModel", () => {
 			expect(result.current.info).toEqual(litellmDefaultModelInfo)
 		})
 
+		it("preserves the selected model when the list transitions from populated to empty", () => {
+			// Primary user-visible scenario: a "Sync Models" click momentarily empties the
+			// router-models list before the refreshed list arrives. The selection must be held
+			// across that transition rather than reset.
+			mockUseRouterModels.mockReturnValue({
+				data: {
+					openrouter: {},
+					requesty: {},
+					litellm: {
+						"my-custom-model": {
+							maxTokens: 4096,
+							contextWindow: 8192,
+							supportsImages: false,
+							supportsPromptCache: false,
+						},
+					},
+				},
+				isLoading: false,
+				isError: false,
+			} as any)
+
+			const apiConfiguration: ProviderSettings = {
+				apiProvider: "litellm",
+				litellmModelId: "my-custom-model",
+			}
+
+			const wrapper = createWrapper()
+			const { result, rerender } = renderHook(() => useSelectedModel(apiConfiguration), { wrapper })
+
+			// Initially the configured model resolves from the populated list.
+			expect(result.current.id).toBe("my-custom-model")
+
+			// Simulate the list emptying mid-sync.
+			mockUseRouterModels.mockReturnValue({
+				data: {
+					openrouter: {},
+					requesty: {},
+					litellm: {},
+				},
+				isLoading: false,
+				isError: false,
+			} as any)
+			rerender()
+
+			// Selection is preserved through the empty window.
+			expect(result.current.id).toBe("my-custom-model")
+		})
+
 		it("should use litellmDefaultModelInfo when selected model not found in routerModels", () => {
 			mockUseRouterModels.mockReturnValue({
 				data: {