diff --git a/.gitignore b/.gitignore
index d90563d..8ae3228 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,9 @@
 # OS files
 .DS_Store
 Thumbs.db
+
+# Build artifacts & local-only scripts
+bin/command-code-proxy-updated
+bin/command-code-proxy-new.exe
+run-proxy.bat
+run-proxy.vbs
diff --git a/bin/command-code-proxy.exe b/bin/command-code-proxy.exe
index 7825553..27bb947 100644
Binary files a/bin/command-code-proxy.exe and b/bin/command-code-proxy.exe differ
diff --git a/internal/api/commandcode.go b/internal/api/commandcode.go
index 6e1f0f8..e04f077 100644
--- a/internal/api/commandcode.go
+++ b/internal/api/commandcode.go
@@ -33,6 +33,7 @@ type CCChatParams struct {
 	MaxTokens   int         `json:"max_tokens"`
 	Temperature float64     `json:"temperature"`
 	Stream      bool        `json:"stream"`
+	Reasoning   string      `json:"reasoning,omitempty"`
 }
 
 type CCConfig struct {
diff --git a/internal/api/openai.go b/internal/api/openai.go
index 8e76198..7e77953 100644
--- a/internal/api/openai.go
+++ b/internal/api/openai.go
@@ -58,8 +58,17 @@ type OpenAIChatRequest struct {
 	PresencePenalty     *float64        `json:"presence_penalty,omitempty"`
 	FrequencyPenalty    *float64        `json:"frequency_penalty,omitempty"`
 	User                string          `json:"user,omitempty"`
+	Reasoning           *ReasoningEffort `json:"reasoning,omitempty"`
 }
 
+type ReasoningEffort string
+
+const (
+	ReasoningEffortLow    ReasoningEffort = "low"
+	ReasoningEffortMedium ReasoningEffort = "medium"
+	ReasoningEffortHigh   ReasoningEffort = "high"
+)
+
 type OpenAIResponsesRequest struct {
 	Model               string   `json:"model"`
 	Input               any      `json:"input"`
@@ -131,10 +140,26 @@ type OpenAIError struct {
 }
 
 type OpenAIModel struct {
-	ID      string `json:"id"`
-	Object  string `json:"object"`
-	Created int64  `json:"created"`
-	OwnedBy string `json:"owned_by"`
+	ID            string `json:"id"`
+	Object        string `json:"object"`
+	Created       int64  `json:"created"`
+	OwnedBy       string `json:"owned_by"`
+	ContextLength int    `json:"context_length,omitempty"`
+	Pricing       *ModelPricing `json:"pricing,omitempty"`
+	// Descriptive metadata from Command Code's docs.
+	// Populated dynamically — display_name is "Claude Sonnet 4.6" etc,
+	// description is the one-line "best for" summary.
+	DisplayName  string `json:"display_name,omitempty"`
+	Description  string `json:"description,omitempty"`
+	Capabilities string `json:"capabilities,omitempty"`
+}
+
+// ModelPricing describes deal/pricing information for a model.
+// Only populated when an active deal exists.
+type ModelPricing struct {
+	Multiplier  string `json:"multiplier,omitempty"`   // e.g. "4x", "2x", "99% off"
+	Description string `json:"description,omitempty"`  // human-readable description
+	Status      string `json:"status,omitempty"`       // "permanent" or expiration date
 }
 
 type OpenAIModelList struct {
diff --git a/internal/proxy/contextmap.go b/internal/proxy/contextmap.go
new file mode 100644
index 0000000..e8c2598
--- /dev/null
+++ b/internal/proxy/contextmap.go
@@ -0,0 +1,112 @@
+package proxy
+
+// contextLengthMap maps model IDs to their context window size in tokens.
+// Command Code's /v1/models endpoint does not expose context windows, so we
+// maintain a local override. Unknown models fall back to 128K (safe default).
+//
+// Sources:
+//   - Upstream provider documentation (Anthropic, OpenAI, Google, etc.)
+//   - Command Code CLI docs (https://commandcode.ai/docs/reference/cli/models)
+//   - Model release notes
+var contextLengthMap = map[string]int{
+	// MoonshotAI Kimi
+	"moonshotai/Kimi-K2.7-Code":           262144,
+	"moonshotai/Kimi-K2.7-Code-Highspeed": 262144,
+	"moonshotai/Kimi-K2.6":                262144,
+	"moonshotai/Kimi-K2.5":                262144,
+
+	// ZhipuAI GLM
+	"zai-org/GLM-5.2": 1048576, // 1M
+	"zai-org/GLM-5.1": 202752,
+	"zai-org/GLM-5":   202752,
+
+	// MiniMax
+	"MiniMaxAI/MiniMax-M3":        1000000, // 1M
+	"MiniMaxAI/MiniMax-M3-Promo":  1000000, // 1M
+	"MiniMaxAI/MiniMax-M2.7":      204800,
+	"MiniMaxAI/MiniMax-M2.5":      204800,
+
+	// DeepSeek
+	"deepseek/deepseek-v4-pro":   1000000, // 1M
+	"deepseek/deepseek-v4-flash": 1000000, // 1M
+
+	// Qwen
+	"Qwen/Qwen3.6-Max-Preview": 1048576, // 1M
+	"Qwen/Qwen3.6-Plus":        1048576, // 1M
+	"Qwen/Qwen3.7-Max":         1048576, // 1M
+	"Qwen/Qwen3.7-Plus":        1048576, // 1M
+
+	// StepFun
+	"stepfun/Step-3.7-Flash": 262144,
+	"stepfun/Step-3.5-Flash": 262144,
+
+	// Xiaomi MiMo
+	"xiaomi/mimo-v2.5-pro": 1048576, // 1M
+	"xiaomi/mimo-v2.5":     1048576, // 1M
+
+	// NVIDIA Nemotron
+	"nvidia/nemotron-3-ultra-550b-a55b": 131072,
+
+	// Anthropic Claude
+	"claude-sonnet-4-6":              1000000, // 1M
+	"claude-fable-5":                 1000000, // 1M
+	"claude-opus-4-8":                1000000, // 1M
+	"claude-opus-4-7":                1000000, // 1M
+	"claude-opus-4-6":                200000,
+	"claude-haiku-4-5":               200000,
+	"claude-haiku-4-5-20251001":      200000,
+
+	// OpenAI GPT
+	"gpt-5.5":         1050000, // ~1M with reasoning overhead
+	"gpt-5.4":         1050000,
+	"gpt-5.3-codex":   400000,
+	"gpt-5.4-mini":    400000,
+
+	// Google Gemini
+	"google/gemini-3.5-flash":      1048576, // 1M
+	"google/gemini-3.1-flash-lite": 1048576, // 1M
+}
+
+// defaultContextLength is returned for unknown models when we can't determine
+// their context window. Conservative — better to over-estimate and trigger
+// compression early than to under-estimate and blow the window.
+const defaultContextLength = 131072 // 128K
+
+// tasteOneModelID is Command Code's internal model that ships free with all plans.
+// Not exposed via upstream /v1/models API; hardcoded here so it's discoverable
+// through the proxy.
+const tasteOneModelID = "taste-1"
+
+// tasteOneContextLength is the context window for taste-1. Set to a reasonable
+// coding-agent window — tune if Command Code publishes a different number.
+const tasteOneContextLength = 262144 // 256K
+
+// ContextLengthFor returns the context window for a model ID.
+// Falls back to defaultContextLength if the model is not in the map.
+func ContextLengthFor(modelID string) int {
+	if n, ok := contextLengthMap[modelID]; ok {
+		return n
+	}
+	// Try matching by suffix (model names can have provider prefixes we don't track)
+	for knownID, n := range contextLengthMap {
+		if modelMatches(modelID, knownID) {
+			return n
+		}
+	}
+	return defaultContextLength
+}
+
+// modelMatches does fuzzy matching — strips common prefixes and compares.
+// e.g. "minimax/MiniMax-M3" matches "MiniMaxAI/MiniMax-M3"
+func modelMatches(query, known string) bool {
+	// Extract model name after the last "/"
+	getSuffix := func(s string) string {
+		for i := len(s) - 1; i >= 0; i-- {
+			if s[i] == '/' {
+				return s[i+1:]
+			}
+		}
+		return s
+	}
+	return getSuffix(query) == getSuffix(known)
+}
diff --git a/internal/proxy/model.go b/internal/proxy/model.go
index 773d247..8621a0e 100644
--- a/internal/proxy/model.go
+++ b/internal/proxy/model.go
@@ -13,6 +13,12 @@ func MapModel(name string) string {
 		return "MiniMaxAI/MiniMax-M2.7"
 	case "minimax-m2.5", "minimax2.5", "minimax":
 		return "MiniMaxAI/MiniMax-M2.5"
+	case "minimax-m3", "minimax3":
+		return "MiniMaxAI/MiniMax-M3"
+	case "minimax-m3-promo", "minimax3-promo":
+		return "MiniMaxAI/MiniMax-M3-Promo"
+	case "glm-5.2", "glm-52":
+		return "zai-org/GLM-5.2"
 	case "glm-5.1":
 		return "zai-org/GLM-5.1"
 	case "glm-5":
@@ -21,14 +27,52 @@ func MapModel(name string) string {
 		return "moonshotai/Kimi-K2.6"
 	case "kimi-k2.5", "kimi2.5":
 		return "moonshotai/Kimi-K2.5"
+	case "kimi-k2.7-code", "kimi2.7-code":
+		return "moonshotai/Kimi-K2.7-Code"
+	case "kimi-k2.7-code-highspeed", "kimi2.7-code-highspeed":
+		return "moonshotai/Kimi-K2.7-Code-Highspeed"
 	case "qwen-3.6-max-preview", "qwen3.6-max":
 		return "Qwen/Qwen3.6-Max-Preview"
 	case "qwen-3.6-plus", "qwen3.6-plus", "qwen3.6":
 		return "Qwen/Qwen3.6-Plus"
+	case "qwen-3.7-max", "qwen3.7-max":
+		return "Qwen/Qwen3.7-Max"
+	case "qwen-3.7-plus", "qwen3.7-plus", "qwen3.7":
+		return "Qwen/Qwen3.7-Plus"
 	case "step-3.5-flash", "step3.5":
 		return "stepfun/Step-3.5-Flash"
+	case "step-3.7-flash", "step3.7":
+		return "stepfun/Step-3.7-Flash"
+	case "mimo-v2.5-pro", "mimo2.5-pro":
+		return "xiaomi/mimo-v2.5-pro"
+	case "mimo-v2.5", "mimo2.5", "mimo":
+		return "xiaomi/mimo-v2.5"
+	case "nemotron-3-ultra", "nemotron":
+		return "nvidia/nemotron-3-ultra-550b-a55b"
+	case "claude-sonnet-4-6", "sonnet-4-6", "sonnet":
+		return "claude-sonnet-4-6"
+	case "claude-fable-5", "fable-5", "fable":
+		return "claude-fable-5"
+	case "claude-opus-4-8", "opus-4-8", "opus":
+		return "claude-opus-4-8"
+	case "claude-opus-4-7", "opus-4-7":
+		return "claude-opus-4-7"
+	case "claude-opus-4-6", "opus-4-6":
+		return "claude-opus-4-6"
+	case "claude-haiku-4-5", "haiku-4-5", "haiku":
+		return "claude-haiku-4-5"
+	case "gpt-5.5":
+		return "gpt-5.5"
+	case "gpt-5.4":
+		return "gpt-5.4"
+	case "gpt-5.3-codex", "codex":
+		return "gpt-5.3-codex"
+	case "gpt-5.4-mini", "gpt-mini":
+		return "gpt-5.4-mini"
 	case "gemini-3.1-flash-lite", "gemini-flash-lite":
 		return "google/gemini-3.1-flash-lite"
+	case "gemini-3.5-flash", "gemini-flash":
+		return "google/gemini-3.5-flash"
 	default:
 		return name // pass through as-is
 	}
diff --git a/internal/proxy/modelfetch.go b/internal/proxy/modelfetch.go
new file mode 100644
index 0000000..1d5e718
--- /dev/null
+++ b/internal/proxy/modelfetch.go
@@ -0,0 +1,311 @@
+package proxy
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log"
+	"net/http"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/dev2k6/command-code-proxy-server/internal/api"
+	"github.com/dev2k6/command-code-proxy-server/internal/version"
+)
+
+const (
+	// upstreamModelsURL is Command Code's Provider API models endpoint
+	upstreamModelsURL = "https://api.commandcode.ai/provider/v1/models"
+
+	// upstreamProbeURL is the chat endpoint we use to probe whether a model is
+	// actually reachable. We send a minimal request — if it returns non-403
+	// (e.g., 200 with content, or 400 for bad request), the model exists.
+	upstreamProbeURL = "https://api.commandcode.ai/alpha/generate"
+
+	// modelCacheTTL is how often we refresh the model list from upstream
+	modelCacheTTL = 6 * time.Hour
+
+	// modelFetchTimeout is the per-request timeout for upstream model fetching
+	modelFetchTimeout = 10 * time.Second
+
+	// modelProbeConcurrency caps how many models we probe in parallel.
+	// Upstream gets cranky if we hammer with 30 concurrent requests.
+	modelProbeConcurrency = 4
+)
+
+// UpstreamModel represents a single model in the upstream Command Code API
+// response (OpenAI-compatible /v1/models format).
+type UpstreamModel struct {
+	ID      string `json:"id"`
+	Object  string `json:"object"`
+	Created int64  `json:"created"`
+	OwnedBy string `json:"owned_by"`
+}
+
+// UpstreamModelList is the response wrapper for /v1/models
+type UpstreamModelList struct {
+	Object string          `json:"object"`
+	Data   []UpstreamModel `json:"data"`
+}
+
+// ModelCache holds the dynamically-fetched model list with thread-safe access.
+type ModelCache struct {
+	mu          sync.RWMutex
+	models      []api.OpenAIModel
+	fetchedAt   time.Time
+	httpClient  *http.Client
+	fetchingNow bool // prevents concurrent refreshes
+	pricing     *PricingCache
+	modelInfo   *ModelInfoCache
+}
+
+// NewModelCache creates an empty cache. Call Refresh() to populate it.
+func NewModelCache(pricing *PricingCache, info *ModelInfoCache) *ModelCache {
+	return &ModelCache{
+		models:    nil,
+		pricing:   pricing,
+		modelInfo: info,
+		httpClient: &http.Client{
+			Timeout: modelFetchTimeout,
+		},
+	}
+}
+
+// Get returns a copy of the current cached models. If cache is empty,
+// returns the static fallback list from getStaticModels().
+func (c *ModelCache) Get() []api.OpenAIModel {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+	if len(c.models) == 0 {
+		// Return static fallback enriched with model info (if available)
+		static := getStaticModels()
+		if c.modelInfo != nil {
+			for i := range static {
+				c.modelInfo.AttachInfo(&static[i])
+			}
+		}
+		return static
+	}
+	// Return a copy so callers can't mutate cache state
+	out := make([]api.OpenAIModel, len(c.models))
+	copy(out, c.models)
+	return out
+}
+
+// IsStale returns true if the cache needs refreshing.
+func (c *ModelCache) IsStale() bool {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+	return time.Since(c.fetchedAt) > modelCacheTTL || len(c.models) == 0
+}
+
+// Refresh fetches the upstream model list and rebuilds the cache.
+// Models that 403 upstream (model not recognized) are filtered out.
+// On error, logs but keeps the existing cache (or static fallback if empty).
+func (c *ModelCache) Refresh(apiKey string) error {
+	c.mu.Lock()
+	if c.fetchingNow {
+		c.mu.Unlock()
+		return nil // another goroutine is already refreshing
+	}
+	c.fetchingNow = true
+	c.mu.Unlock()
+
+	defer func() {
+		c.mu.Lock()
+		c.fetchingNow = false
+		c.mu.Unlock()
+	}()
+
+	models, err := c.fetchAndValidate(apiKey)
+	if err != nil {
+		return err
+	}
+
+	c.mu.Lock()
+	c.models = models
+	c.fetchedAt = time.Now()
+	c.mu.Unlock()
+
+	log.Printf("[models] refreshed cache: %d models (validated against upstream)", len(models))
+	return nil
+}
+
+// fetchAndValidate fetches the upstream model list, then probes each model
+// to verify it's reachable. Models that 403 upstream are filtered out.
+func (c *ModelCache) fetchAndValidate(apiKey string) ([]api.OpenAIModel, error) {
+	ctx, cancel := context.WithTimeout(context.Background(), modelFetchTimeout)
+	defer cancel()
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, upstreamModelsURL, nil)
+	if err != nil {
+		return nil, fmt.Errorf("build request: %w", err)
+	}
+	req.Header.Set("Authorization", "Bearer "+apiKey)
+	req.Header.Set("Accept", "application/json")
+	req.Header.Set("User-Agent", "command-code-proxy/1.0")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("fetch upstream: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		return nil, fmt.Errorf("upstream status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
+	}
+
+	var upstream UpstreamModelList
+	if err := json.NewDecoder(resp.Body).Decode(&upstream); err != nil {
+		return nil, fmt.Errorf("decode upstream: %w", err)
+	}
+
+	// Probe each model in parallel to filter out non-existent ones.
+	probeResults := c.probeModels(apiKey, upstream.Data)
+
+	// Build the validated model list.
+	enriched := make([]api.OpenAIModel, 0, len(probeResults))
+	for _, pr := range probeResults {
+		if !pr.reachable {
+			log.Printf("[models] filtered out unreachable model: %s (probe status: %d)", pr.model.ID, pr.status)
+			continue
+		}
+		m := api.OpenAIModel{
+			ID:            pr.model.ID,
+			Object:        pr.model.Object,
+			Created:       pr.model.Created,
+			OwnedBy:       pr.model.OwnedBy,
+			ContextLength: ContextLengthFor(pr.model.ID),
+		}
+		// Attach pricing/deal info if available
+		if c.pricing != nil {
+			if deal, ok := c.pricing.GetDeal(pr.model.ID); ok {
+				m.Pricing = &api.ModelPricing{
+					Multiplier:  deal.Multiplier,
+					Description: deal.Description,
+					Status:      deal.Status,
+				}
+			}
+		}
+		// Attach display name + description if available
+		if c.modelInfo != nil {
+			c.modelInfo.AttachInfo(&m)
+		}
+		enriched = append(enriched, m)
+	}
+	return enriched, nil
+}
+
+// probeResult holds the result of probing a single model.
+type probeResult struct {
+	model     UpstreamModel
+	reachable bool
+	status    int
+}
+
+// probeModels probes each model in parallel to verify upstream reachability.
+// Returns results in input order.
+func (c *ModelCache) probeModels(apiKey string, models []UpstreamModel) []probeResult {
+	results := make([]probeResult, len(models))
+	sem := make(chan struct{}, modelProbeConcurrency)
+	var wg sync.WaitGroup
+
+	for i, m := range models {
+		wg.Add(1)
+		sem <- struct{}{}
+		go func(idx int, model UpstreamModel) {
+			defer wg.Done()
+			defer func() { <-sem }()
+			reachable, status := c.probeModel(apiKey, model.ID)
+			results[idx] = probeResult{model: model, reachable: reachable, status: status}
+		}(i, m)
+	}
+	wg.Wait()
+	return results
+}
+
+// probeModel sends a minimal chat request to verify a model is reachable.
+// Returns (true, status) if the model exists (any non-403 response).
+// Returns (false, status) if upstream returns 403 (model not recognized).
+func (c *ModelCache) probeModel(apiKey, modelID string) (bool, int) {
+	ctx, cancel := context.WithTimeout(context.Background(), modelFetchTimeout)
+	defer cancel()
+
+	// Build a minimal probe request — empty messages, max 1 token.
+	// Upstream will return either:
+	//   - 403 MODEL_NOT_IN_PLAN or "model not recognized" — model doesn't exist
+	//   - 200 with empty content — model exists but request was minimal
+	//   - 400 bad request — model exists, request was malformed
+	probeBody := map[string]any{
+		"config": map[string]any{
+			"workingDir":    ".",
+			"date":          time.Now().Format("2006-01-02"),
+			"environment":   "cli",
+			"structure":     []string{},
+			"isGitRepo":     false,
+			"currentBranch": "",
+			"mainBranch":    "main",
+			"gitStatus":     "",
+			"recentCommits": []string{},
+		},
+		"memory": "",
+		"taste":  "",
+		"skills": "",
+		"params": map[string]any{
+			"model":       modelID,
+			"messages":    []map[string]any{},
+			"tools":       []any{},
+			"system":      "",
+			"max_tokens":  1,
+			"temperature": 0.0,
+			"stream":      false,
+		},
+		"threadId": "probe",
+	}
+	bodyJSON, _ := json.Marshal(probeBody)
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, upstreamProbeURL, bytes.NewReader(bodyJSON))
+	if err != nil {
+		return false, 0
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Authorization", "Bearer "+apiKey)
+	req.Header.Set("x-command-code-version", version.GetCommandCodeVersion())
+	req.Header.Set("x-cli-environment", "production")
+	req.Header.Set("Accept", "application/json")
+	req.Header.Set("User-Agent", "command-code-proxy/1.0")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return false, 0
+	}
+	defer resp.Body.Close()
+	io.Copy(io.Discard, resp.Body)
+
+	// 403 means model not recognized — filter out.
+	// Anything else (200, 400, 429, 500) means model exists at this ID.
+	return resp.StatusCode != http.StatusForbidden, resp.StatusCode
+}
+
+// StartBackgroundRefresh launches a goroutine that periodically refreshes
+// the cache. Safe to call once at startup.
+func (c *ModelCache) StartBackgroundRefresh(apiKey string) {
+	go func() {
+		// Initial fetch
+		if err := c.Refresh(apiKey); err != nil {
+			log.Printf("[models] initial refresh failed (using static fallback): %v", err)
+		}
+
+		ticker := time.NewTicker(modelCacheTTL)
+		defer ticker.Stop()
+		for range ticker.C {
+			if err := c.Refresh(apiKey); err != nil {
+				log.Printf("[models] refresh failed (keeping existing cache): %v", err)
+			}
+		}
+	}()
+}
diff --git a/internal/proxy/modelinfo.go b/internal/proxy/modelinfo.go
new file mode 100644
index 0000000..d148728
--- /dev/null
+++ b/internal/proxy/modelinfo.go
@@ -0,0 +1,231 @@
+package proxy
+
+import (
+	"context"
+	"io"
+	"log"
+	"net/http"
+	"regexp"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/dev2k6/command-code-proxy-server/internal/api"
+)
+
+const (
+	// modelsDocURL is the public Command Code docs page that lists every model
+	// with its display name and a one-line "best for" description.
+	modelsDocURL = "https://commandcode.ai/docs/reference/cli/models"
+
+	// modelInfoFetchInterval — refresh model info every 24 hours. Descriptions
+	// change infrequently; daily refresh is plenty.
+	modelInfoFetchInterval = 24 * time.Hour
+
+	// modelInfoFetchTimeout — per-request timeout.
+	modelInfoFetchTimeout = 15 * time.Second
+)
+
+// ModelInfo describes a single model: its display name, best-for description,
+// and capabilities (e.g., "text, vision").
+type ModelInfo struct {
+	ID           string `json:"id"`            // matches upstream model ID (lowercase for lookup)
+	DisplayName  string `json:"display_name"`  // human-friendly name like "Claude Sonnet 4.6"
+	Description  string `json:"description"`   // one-line "best for" description
+	Capabilities string `json:"capabilities"`  // comma-separated: "text", "text, vision"
+}
+
+// ModelInfoCache holds scraped model metadata with thread-safe access.
+type ModelInfoCache struct {
+	mu        sync.RWMutex
+	infos     map[string]ModelInfo // key: model ID (lowercase)
+	fetchedAt time.Time
+	client    *http.Client
+}
+
+// NewModelInfoCache creates an empty cache.
+func NewModelInfoCache() *ModelInfoCache {
+	return &ModelInfoCache{
+		infos:  make(map[string]ModelInfo),
+		client: &http.Client{Timeout: modelInfoFetchTimeout},
+	}
+}
+
+// Get returns the model info for a given ID, or zero-value if unknown.
+// Tries exact match first, then fuzzy match by name.
+func (c *ModelInfoCache) Get(modelID string) (ModelInfo, bool) {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+	lower := strings.ToLower(modelID)
+	if info, ok := c.infos[lower]; ok {
+		return info, true
+	}
+	// Fuzzy: last path component normalized
+	lastPath := lower
+	if i := strings.LastIndex(lower, "/"); i >= 0 {
+		lastPath = lower[i+1:]
+	}
+	for k, info := range c.infos {
+		// Try matching last path of stored key
+		storedLast := k
+		if i := strings.LastIndex(k, "/"); i >= 0 {
+			storedLast = k[i+1:]
+		}
+		if normalizeModelKey(lastPath) == normalizeModelKey(storedLast) {
+			return info, true
+		}
+	}
+	return ModelInfo{}, false
+}
+
+// normalizeModelKey strips hyphens, dots, and lowercases for fuzzy comparison.
+func normalizeModelKey(s string) string {
+	s = strings.ReplaceAll(s, "-", "")
+	s = strings.ReplaceAll(s, ".", "")
+	s = strings.ReplaceAll(s, "_", "")
+	return strings.ToLower(s)
+}
+
+// Refresh scrapes the docs page and rebuilds the cache.
+func (c *ModelInfoCache) Refresh() error {
+	ctx, cancel := context.WithTimeout(context.Background(), modelInfoFetchTimeout)
+	defer cancel()
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, modelsDocURL, nil)
+	if err != nil {
+		return err
+	}
+	req.Header.Set("User-Agent", "command-code-proxy/1.0")
+
+	resp, err := c.client.Do(req)
+	if err != nil {
+		return err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return &ModelInfoFetchError{Status: resp.StatusCode}
+	}
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return err
+	}
+
+	infos := scrapeModelInfo(string(body))
+
+	c.mu.Lock()
+	c.infos = infos
+	c.fetchedAt = time.Now()
+	c.mu.Unlock()
+
+	log.Printf("[modelinfo] refreshed: %d models", len(infos))
+	return nil
+}
+
+// ModelInfoFetchError returned on non-200 from upstream docs page.
+type ModelInfoFetchError struct {
+	Status int
+}
+
+func (e *ModelInfoFetchError) Error() string {
+	return "model info fetch failed: status " + http.StatusText(e.Status)
+}
+
+// StartBackgroundRefresh launches a goroutine that periodically refreshes info.
+func (c *ModelInfoCache) StartBackgroundRefresh() {
+	go func() {
+		if err := c.Refresh(); err != nil {
+			log.Printf("[modelinfo] initial refresh failed (descriptions will be empty): %v", err)
+		}
+		ticker := time.NewTicker(modelInfoFetchInterval)
+		defer ticker.Stop()
+		for range ticker.C {
+			if err := c.Refresh(); err != nil {
+				log.Printf("[modelinfo] refresh failed (keeping existing): %v", err)
+			}
+		}
+	}()
+}
+
+// scrapeModelInfo parses the docs HTML and extracts (id, name, description, capabilities)
+// for each model. Returns a map keyed by lowercase model ID.
+func scrapeModelInfo(html string) map[string]ModelInfo {
+	infos := make(map[string]ModelInfo)
+
+	// Pattern matches rows in the docs models table:
+	// <code>model-id</code> ... <td>Name</td> <td>best-for description</td> <td>capabilities</td>
+	rowPattern := regexp.MustCompile(
+		`<code[^>]*>([^<]+)</code>.*?<td[^>]*>\s*([A-Z][^<]+?)\s*</td>\s*<td[^>]*>([^<]+?)</td>\s*<td[^>]*>([^<]+?)</td>`,
+	)
+	matches := rowPattern.FindAllStringSubmatch(html, -1)
+	for _, m := range matches {
+		if len(m) < 5 {
+			continue
+		}
+		id := strings.TrimSpace(m[1])
+		name := strings.TrimSpace(m[2])
+		desc := strings.TrimSpace(m[3])
+		caps := strings.TrimSpace(m[4])
+
+		// Skip placeholder/empty
+		if strings.HasPrefix(id, "-") || id == "" {
+			continue
+		}
+		if strings.Contains(desc, "&amp;") {
+			desc = strings.ReplaceAll(desc, "&amp;", "&")
+		}
+
+		infos[strings.ToLower(id)] = ModelInfo{
+			ID:           id,
+			DisplayName:  name,
+			Description:  desc,
+			Capabilities: caps,
+		}
+	}
+
+	if len(infos) == 0 {
+		log.Printf("[modelinfo] WARNING: no models parsed from docs page — HTML may have changed")
+	}
+	return infos
+}
+
+// AttachInfo enriches an OpenAIModel with display name and description
+// if available in the cache. Mutates the model in-place.
+func (c *ModelInfoCache) AttachInfo(model *api.OpenAIModel) {
+	if c == nil || model == nil {
+		return
+	}
+	info, ok := c.Get(model.ID)
+	if !ok {
+		// Try hardcoded fallbacks for models not in the docs table
+		if fallback, exists := hardcodedInfo[strings.ToLower(model.ID)]; exists {
+			model.DisplayName = fallback.DisplayName
+			model.Description = fallback.Description
+			model.Capabilities = fallback.Capabilities
+		}
+		return
+	}
+	model.DisplayName = info.DisplayName
+	model.Description = info.Description
+	if info.Capabilities != "" {
+		model.Capabilities = info.Capabilities
+	}
+}
+
+// hardcodedInfo provides descriptions for models that exist upstream but aren't
+// listed in the public docs page. These are kept minimal and conservative.
+var hardcodedInfo = map[string]ModelInfo{
+	"claude-haiku-4-5-20251001": {
+		ID:           "claude-haiku-4-5-20251001",
+		DisplayName:  "Claude Haiku 4.5",
+		Description:  "fast, compact model for high-throughput tasks",
+		Capabilities: "text, vision",
+	},
+	"zai-org/glm-5.2": {
+		ID:           "zai-org/GLM-5.2",
+		DisplayName:  "GLM 5.2",
+		Description:  "extended-context autonomous coding agent",
+		Capabilities: "text",
+	},
+}
diff --git a/internal/proxy/pricing.go b/internal/proxy/pricing.go
new file mode 100644
index 0000000..48481ec
--- /dev/null
+++ b/internal/proxy/pricing.go
@@ -0,0 +1,305 @@
+package proxy
+
+import (
+	"context"
+	"io"
+	"log"
+	"net/http"
+	"regexp"
+	"strings"
+	"sync"
+	"time"
+)
+
+const (
+	// pricingURL is the public pricing page. We scrape deals from it.
+	pricingURL = "https://commandcode.ai/docs/resources/pricing-limits"
+
+	// pricingFetchInterval is how often we re-scrape pricing/deals.
+	pricingFetchInterval = 24 * time.Hour
+
+	// pricingFetchTimeout is the per-request timeout.
+	pricingFetchTimeout = 15 * time.Second
+)
+
+// Deal represents a single pricing deal/discount on a model.
+type Deal struct {
+	Model       string `json:"model"`        // model identifier (matches upstream ID)
+	Multiplier  string `json:"multiplier"`   // e.g. "4x", "2x", "99% off"
+	Description string `json:"description"`  // human-readable description
+	Status      string `json:"status"`       // "permanent" or expiration date
+}
+
+// PricingCache holds scraped deal data with thread-safe access.
+type PricingCache struct {
+	mu        sync.RWMutex
+	deals     map[string]Deal // key: model identifier (lowercase)
+	fetchedAt time.Time
+	client    *http.Client
+}
+
+// NewPricingCache creates an empty pricing cache.
+func NewPricingCache() *PricingCache {
+	return &PricingCache{
+		deals:  make(map[string]Deal),
+		client: &http.Client{Timeout: pricingFetchTimeout},
+	}
+}
+
+// GetDeal returns the deal for a model, or zero-value Deal if no deal applies.
+// Comparison is case-insensitive and matches:
+//   - exact ID match
+//   - last path component (e.g. "Qwen/Qwen3.7-Max" matches "qwen-3.7-max")
+//   - substring match (e.g. "nvidia/nemotron-3-ultra-550b-a55b" matches "nemotron-3-ultra")
+func (c *PricingCache) GetDeal(modelID string) (Deal, bool) {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+	lowerID := strings.ToLower(modelID)
+
+	// Try exact match first
+	if d, ok := c.deals[lowerID]; ok {
+		return d, true
+	}
+
+	// Try fuzzy matching against all deal keys.
+	for k, d := range c.deals {
+		// Extract last path component of model ID
+		lastPath := lowerID
+		if i := strings.LastIndex(lowerID, "/"); i >= 0 {
+			lastPath = lowerID[i+1:]
+		}
+
+		// Match strategies (in priority order):
+		// 1. last path equals key
+		// 2. last path contains key (e.g., "qwen3.7-max" contains "qwen-3.7-max")
+		//    OR key contains last path (e.g., "minimax-m3" contains "minimax")
+		// 3. Normalize hyphens <-> dots
+		if lastPath == k {
+			return d, true
+		}
+		if strings.Contains(lastPath, k) || strings.Contains(k, lastPath) {
+			return d, true
+		}
+		// Try replacing dots <-> dashes
+		alt1 := strings.ReplaceAll(lastPath, ".", "-")
+		alt2 := strings.ReplaceAll(lastPath, "-", ".")
+		if alt1 == k || alt2 == k {
+			return d, true
+		}
+		if strings.Contains(alt1, k) || strings.Contains(alt2, k) {
+			return d, true
+		}
+		// Try replacing dashes with nothing (e.g., "qwen-3.7-max" matches "qwen3.7-max")
+		stripLast := strings.ReplaceAll(lastPath, "-", "")
+		stripKey := strings.ReplaceAll(k, "-", "")
+		if stripLast == stripKey {
+			return d, true
+		}
+	}
+	return Deal{}, false
+}
+
+// Refresh scrapes the pricing page and rebuilds the deal cache.
+// On error, logs but keeps the existing cache.
+func (c *PricingCache) Refresh() error {
+	ctx, cancel := context.WithTimeout(context.Background(), pricingFetchTimeout)
+	defer cancel()
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, pricingURL, nil)
+	if err != nil {
+		return err
+	}
+	req.Header.Set("User-Agent", "command-code-proxy/1.0")
+
+	resp, err := c.client.Do(req)
+	if err != nil {
+		return err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		return &PricingFetchError{Status: resp.StatusCode, Body: strings.TrimSpace(string(body))}
+	}
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return err
+	}
+
+	deals := scrapeDeals(string(body))
+
+	c.mu.Lock()
+	c.deals = deals
+	c.fetchedAt = time.Now()
+	c.mu.Unlock()
+
+	log.Printf("[pricing] refreshed: %d deals", len(deals))
+	return nil
+}
+
+// StartBackgroundRefresh launches a goroutine that periodically refreshes pricing.
+func (c *PricingCache) StartBackgroundRefresh() {
+	go func() {
+		if err := c.Refresh(); err != nil {
+			log.Printf("[pricing] initial refresh failed (deals will be empty): %v", err)
+		}
+		ticker := time.NewTicker(pricingFetchInterval)
+		defer ticker.Stop()
+		for range ticker.C {
+			if err := c.Refresh(); err != nil {
+				log.Printf("[pricing] refresh failed (keeping existing deals): %v", err)
+			}
+		}
+	}()
+}
+
+// PricingFetchError is returned when the upstream pricing page returns a non-200.
+type PricingFetchError struct {
+	Status int
+	Body   string
+}
+
+func (e *PricingFetchError) Error() string {
+	return "pricing fetch failed: status " + http.StatusText(e.Status)
+}
+
+// scrapeDeals parses the HTML for DEAL blocks and extracts model + description.
+// This is intentionally lenient — Command Code may change the HTML structure,
+// and we'd rather get partial data than no data.
+//
+// Robustness strategy:
+//   - Multiple regex patterns tried in order
+//   - Falls back to single-model extraction if multi-model match fails
+//   - Returns empty map on any parse failure (never errors)
+//   - Logs warnings for unexpected structures so we notice breakage
+func scrapeDeals(html string) map[string]Deal {
+	deals := make(map[string]Deal)
+
+	// Strategy: parse the HTML block-by-block.
+	// Each deal block is between DEAL</span> and the next </a></div> or end of deals section.
+	// We extract the inner HTML of each <a>...</a> deal element.
+	dealBlockRe := regexp.MustCompile(
+		`(?s)DEAL</span>(.*?)</a></div>`,
+	)
+
+	dealBlocks := dealBlockRe.FindAllStringSubmatch(html, -1)
+	log.Printf("[pricing] found %d deal blocks in HTML", len(dealBlocks))
+	if len(dealBlocks) == 0 {
+		log.Printf("[pricing] WARNING: no deal blocks found — HTML structure may have changed")
+		return deals
+	}
+
+	// Filter out bogus matches (e.g., footer/nav text accidentally matched).
+	// Real deal blocks are short (< 2000 chars) and contain at least one <code> tag.
+	var realBlocks [][]string
+	for _, b := range dealBlocks {
+		if len(b[1]) > 2000 {
+			continue // too long — likely a nav menu or footer
+		}
+		if strings.Contains(b[1], "<code") {
+			realBlocks = append(realBlocks, b)
+		}
+	}
+	log.Printf("[pricing] filtered to %d real deal blocks (with <code>, len<2000)", len(realBlocks))
+	dealBlocks = realBlocks
+	if len(dealBlocks) == 0 {
+		return deals
+	}
+
+	codeRe := regexp.MustCompile(`<code[^>]*>([^<]+)</code>`)
+	statusRe := regexp.MustCompile(`(permanent|through\s+\w+\s+\d+,\s+\d{4})`)
+
+	for _, block := range dealBlocks {
+		if len(block) < 2 {
+			continue
+		}
+		inner := block[1]
+
+		// Find all model codes in this deal
+		codes := codeRe.FindAllStringSubmatch(inner, -1)
+		if len(codes) == 0 {
+			continue
+		}
+
+		// Find status
+		statusMatch := statusRe.FindStringSubmatch(inner)
+		status := "unknown"
+		if len(statusMatch) >= 2 {
+			status = statusMatch[1]
+		}
+
+		// Description: text after the LAST </code> in the deal, before status marker
+		lastCodeEnd := strings.LastIndex(inner, "</code>")
+		if lastCodeEnd < 0 {
+			continue
+		}
+		descStart := lastCodeEnd + len("</code>")
+
+		// Find status in remaining text
+		descText := inner[descStart:]
+		statusIdx := strings.Index(descText, status)
+		if statusIdx < 0 {
+			// Status might be in nested span; take up to next </span>
+			spanEnd := strings.Index(descText, "</span>")
+			if spanEnd < 0 {
+				spanEnd = len(descText)
+			}
+			statusIdx = spanEnd
+		}
+		desc := descText[:statusIdx]
+		desc = stripHTMLTags(desc)
+		desc = strings.TrimSpace(desc)
+
+		mult := extractMultiplier(desc)
+
+		// Apply to all models in this deal
+		for _, c := range codes {
+			if len(c) < 2 {
+				continue
+			}
+			model := strings.TrimSpace(c[1])
+			deal := Deal{
+				Model:       model,
+				Multiplier:  mult,
+				Description: desc,
+				Status:      status,
+			}
+			deals[strings.ToLower(model)] = deal
+		}
+	}
+
+	if len(deals) == 0 {
+		log.Printf("[pricing] WARNING: no deals parsed from %d blocks — extraction failed", len(dealBlocks))
+	}
+
+	return deals
+}
+
+// stripHTMLTags removes HTML tags from a string (best-effort, no full parser).
+func stripHTMLTags(s string) string {
+	tag := regexp.MustCompile(`<[^>]+>`)
+	s = tag.ReplaceAllString(s, "")
+	// Collapse whitespace
+	ws := regexp.MustCompile(`\s+`)
+	s = ws.ReplaceAllString(s, " ")
+	return s
+}
+
+// extractMultiplier pulls the multiplier string (e.g., "4×", "2x", "99% off") from a description.
+func extractMultiplier(desc string) string {
+	// Try × (Unicode), x, or %
+	patterns := []struct {
+		re   *regexp.Regexp
+		tmpl func([]string) string
+	}{
+		{regexp.MustCompile(`(\d+(?:\.\d+)?)\s*[x×]`), func(m []string) string { return m[1] + "x" }},
+		{regexp.MustCompile(`(\d+)%\s*off`), func(m []string) string { return m[1] + "% off" }},
+	}
+	for _, p := range patterns {
+		if m := p.re.FindStringSubmatch(desc); m != nil {
+			return p.tmpl(m)
+		}
+	}
+	return ""
+}
diff --git a/internal/proxy/proxy.go b/internal/proxy/proxy.go
index cecd881..a7ebc5f 100644
--- a/internal/proxy/proxy.go
+++ b/internal/proxy/proxy.go
@@ -60,19 +60,34 @@ func normalizeFinishReason(reason string) string {
 
 // Proxy struct
 type Proxy struct {
-	APIKey  string
-	BaseURL string
-	Client  *http.Client
-	Debug   bool
+	APIKey     string
+	BaseURL    string
+	Client     *http.Client
+	Debug      bool
+	Models     *ModelCache
+	Pricing    *PricingCache
+	ModelInfo  *ModelInfoCache
 }
 
 // NewProxy creates a new proxy instance
 func NewProxy(apiKey string) *Proxy {
-	return &Proxy{
-		APIKey:  apiKey,
-		BaseURL: defaultBaseURL,
-		Client:  &http.Client{Timeout: defaultTimeout},
+	pricing := NewPricingCache()
+	modelInfo := NewModelInfoCache()
+	p := &Proxy{
+		APIKey:    apiKey,
+		BaseURL:   defaultBaseURL,
+		Client:    &http.Client{Timeout: defaultTimeout},
+		Models:    NewModelCache(pricing, modelInfo),
+		Pricing:   pricing,
+		ModelInfo: modelInfo,
+	}
+	// Kick off background refreshes — don't block startup
+	pricing.StartBackgroundRefresh()
+	modelInfo.StartBackgroundRefresh()
+	if apiKey != "" {
+		p.Models.StartBackgroundRefresh(apiKey)
 	}
+	return p
 }
 
 // BuildRequest builds the CommandCode request body
@@ -95,6 +110,12 @@ func (p *Proxy) BuildRequest(openAIReq api.OpenAIChatRequest) (api.CCRequestBody
 
 	tools := ConvertTools(openAIReq.Tools)
 
+	// Map reasoning effort
+	var reasoning string
+	if openAIReq.Reasoning != nil {
+		reasoning = string(*openAIReq.Reasoning)
+	}
+
 	ccBody := api.CCRequestBody{
 		Config: api.CCConfig{
 			WorkingDir:    ".",
@@ -118,6 +139,7 @@ func (p *Proxy) BuildRequest(openAIReq api.OpenAIChatRequest) (api.CCRequestBody
 			MaxTokens:   maxTokens,
 			Temperature: temperature,
 			Stream:      true,
+			Reasoning:   reasoning,
 		},
 		ThreadID: uuid.New().String(),
 	}
@@ -668,30 +690,79 @@ func responseItemsToMessages(items []any) []api.OpenAIMessage {
 
 // HandleModels handles the /v1/models endpoint
 func (p *Proxy) HandleModels(w http.ResponseWriter, r *http.Request) {
+	// Use the dynamic cache; fall back to static list if cache is empty/unavailable.
+	cached := p.Models.Get()
+
+	// If cache returned the static fallback (empty cache triggers fallback in Get),
+	// still try to refresh in the background so the next request gets fresh data.
+	if p.Models.IsStale() && !p.Models.fetchingNow {
+		go func() {
+			if err := p.Models.Refresh(p.APIKey); err != nil {
+				log.Printf("[models] background refresh failed: %v", err)
+			}
+		}()
+	}
+
 	models := api.OpenAIModelList{
 		Object: "list",
-		Data: []api.OpenAIModel{
-			// MoonshotAI
-			{ID: "moonshotai/Kimi-K2.6", Object: "model", Created: 0, OwnedBy: "moonshotai"},
-			{ID: "moonshotai/Kimi-K2.5", Object: "model", Created: 0, OwnedBy: "moonshotai"},
-			// ZhipuAI
-			{ID: "zai-org/GLM-5.1", Object: "model", Created: 0, OwnedBy: "zhipuai"},
-			{ID: "zai-org/GLM-5", Object: "model", Created: 0, OwnedBy: "zhipuai"},
-			// MiniMaxAI
-			{ID: "MiniMaxAI/MiniMax-M2.7", Object: "model", Created: 0, OwnedBy: "minimaxai"},
-			{ID: "MiniMaxAI/MiniMax-M2.5", Object: "model", Created: 0, OwnedBy: "minimaxai"},
-			// DeepSeek
-			{ID: "deepseek/deepseek-v4-pro", Object: "model", Created: 0, OwnedBy: "deepseek"},
-			{ID: "deepseek/deepseek-v4-flash", Object: "model", Created: 0, OwnedBy: "deepseek"},
-			// Qwen
-			{ID: "Qwen/Qwen3.6-Max-Preview", Object: "model", Created: 0, OwnedBy: "qwen"},
-			{ID: "Qwen/Qwen3.6-Plus", Object: "model", Created: 0, OwnedBy: "qwen"},
-			// StepFun
-			{ID: "stepfun/Step-3.5-Flash", Object: "model", Created: 0, OwnedBy: "stepfun"},
-			// Google
-			{ID: "google/gemini-3.1-flash-lite", Object: "model", Created: 0, OwnedBy: "google"},
-		},
+		Data:   cached,
 	}
 	w.Header().Set("Content-Type", "application/json")
 	json.NewEncoder(w).Encode(models)
 }
+
+// getStaticModels returns the hardcoded fallback list of models. Used when
+// the dynamic cache is empty and we haven't been able to fetch from upstream.
+// Context lengths are looked up from contextmap.go so we have one source of truth.
+func getStaticModels() []api.OpenAIModel {
+	ids := []string{
+		// MoonshotAI
+		"moonshotai/Kimi-K2.7-Code", "moonshotai/Kimi-K2.7-Code-Highspeed",
+		// ZhipuAI
+		"zai-org/GLM-5.2", "zai-org/GLM-5.1", "zai-org/GLM-5",
+		// MiniMaxAI
+		"MiniMaxAI/MiniMax-M3",
+		"MiniMaxAI/MiniMax-M2.7", "MiniMaxAI/MiniMax-M2.5",
+		// DeepSeek
+		"deepseek/deepseek-v4-pro", "deepseek/deepseek-v4-flash",
+		// Qwen
+		"Qwen/Qwen3.6-Max-Preview", "Qwen/Qwen3.6-Plus",
+		"Qwen/Qwen3.7-Max", "Qwen/Qwen3.7-Plus",
+		// StepFun
+		"stepfun/Step-3.7-Flash", "stepfun/Step-3.5-Flash",
+		// Xiaomi
+		"xiaomi/mimo-v2.5-pro", "xiaomi/mimo-v2.5",
+		// NVIDIA
+		"nvidia/nemotron-3-ultra-550b-a55b",
+		// Anthropic
+		"claude-sonnet-4-6", "claude-fable-5",
+		"claude-opus-4-8", "claude-opus-4-7", "claude-opus-4-6",
+		"claude-haiku-4-5", "claude-haiku-4-5-20251001",
+		// OpenAI
+		"gpt-5.5", "gpt-5.4", "gpt-5.3-codex", "gpt-5.4-mini",
+		// Google
+		"google/gemini-3.5-flash", "google/gemini-3.1-flash-lite",
+	}
+	out := make([]api.OpenAIModel, 0, len(ids))
+	for _, id := range ids {
+		m := api.OpenAIModel{
+			ID:            id,
+			Object:        "model",
+			Created:       0,
+			OwnedBy:       inferOwner(id),
+			ContextLength: ContextLengthFor(id),
+		}
+		out = append(out, m)
+	}
+	return out
+}
+
+// inferOwner extracts the provider name from a model ID like "anthropic/claude-...".
+func inferOwner(id string) string {
+	for i, c := range id {
+		if c == '/' {
+			return id[:i]
+		}
+	}
+	return "unknown"
+}