diff --git a/.gitignore b/.gitignore
index d90563d..8ae3228 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,9 @@
# OS files
.DS_Store
Thumbs.db
+
+# Build artifacts & local-only scripts
+bin/command-code-proxy-updated
+bin/command-code-proxy-new.exe
+run-proxy.bat
+run-proxy.vbs
diff --git a/bin/command-code-proxy.exe b/bin/command-code-proxy.exe
index 7825553..27bb947 100644
Binary files a/bin/command-code-proxy.exe and b/bin/command-code-proxy.exe differ
diff --git a/internal/api/commandcode.go b/internal/api/commandcode.go
index 6e1f0f8..e04f077 100644
--- a/internal/api/commandcode.go
+++ b/internal/api/commandcode.go
@@ -33,6 +33,7 @@ type CCChatParams struct {
MaxTokens int `json:"max_tokens"`
Temperature float64 `json:"temperature"`
Stream bool `json:"stream"`
+ Reasoning string `json:"reasoning,omitempty"`
}
type CCConfig struct {
diff --git a/internal/api/openai.go b/internal/api/openai.go
index 8e76198..7e77953 100644
--- a/internal/api/openai.go
+++ b/internal/api/openai.go
@@ -58,8 +58,17 @@ type OpenAIChatRequest struct {
PresencePenalty *float64 `json:"presence_penalty,omitempty"`
FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"`
User string `json:"user,omitempty"`
+ Reasoning *ReasoningEffort `json:"reasoning,omitempty"`
}
+type ReasoningEffort string
+
+const (
+ ReasoningEffortLow ReasoningEffort = "low"
+ ReasoningEffortMedium ReasoningEffort = "medium"
+ ReasoningEffortHigh ReasoningEffort = "high"
+)
+
type OpenAIResponsesRequest struct {
Model string `json:"model"`
Input any `json:"input"`
@@ -131,10 +140,26 @@ type OpenAIError struct {
}
type OpenAIModel struct {
- ID string `json:"id"`
- Object string `json:"object"`
- Created int64 `json:"created"`
- OwnedBy string `json:"owned_by"`
+ ID string `json:"id"`
+ Object string `json:"object"`
+ Created int64 `json:"created"`
+ OwnedBy string `json:"owned_by"`
+ ContextLength int `json:"context_length,omitempty"`
+ Pricing *ModelPricing `json:"pricing,omitempty"`
+ // Descriptive metadata from Command Code's docs.
+ // Populated dynamically — display_name is "Claude Sonnet 4.6" etc,
+ // description is the one-line "best for" summary.
+ DisplayName string `json:"display_name,omitempty"`
+ Description string `json:"description,omitempty"`
+ Capabilities string `json:"capabilities,omitempty"`
+}
+
+// ModelPricing describes deal/pricing information for a model.
+// Only populated when an active deal exists.
+type ModelPricing struct {
+ Multiplier string `json:"multiplier,omitempty"` // e.g. "4x", "2x", "99% off"
+ Description string `json:"description,omitempty"` // human-readable description
+ Status string `json:"status,omitempty"` // "permanent" or expiration date
}
type OpenAIModelList struct {
diff --git a/internal/proxy/contextmap.go b/internal/proxy/contextmap.go
new file mode 100644
index 0000000..e8c2598
--- /dev/null
+++ b/internal/proxy/contextmap.go
@@ -0,0 +1,112 @@
+package proxy
+
+// contextLengthMap maps model IDs to their context window size in tokens.
+// Command Code's /v1/models endpoint does not expose context windows, so we
+// maintain a local override. Unknown models fall back to 128K (safe default).
+//
+// Sources:
+// - Upstream provider documentation (Anthropic, OpenAI, Google, etc.)
+// - Command Code CLI docs (https://commandcode.ai/docs/reference/cli/models)
+// - Model release notes
+var contextLengthMap = map[string]int{
+ // MoonshotAI Kimi
+ "moonshotai/Kimi-K2.7-Code": 262144,
+ "moonshotai/Kimi-K2.7-Code-Highspeed": 262144,
+ "moonshotai/Kimi-K2.6": 262144,
+ "moonshotai/Kimi-K2.5": 262144,
+
+ // ZhipuAI GLM
+ "zai-org/GLM-5.2": 1048576, // 1M
+ "zai-org/GLM-5.1": 202752,
+ "zai-org/GLM-5": 202752,
+
+ // MiniMax
+ "MiniMaxAI/MiniMax-M3": 1000000, // 1M
+ "MiniMaxAI/MiniMax-M3-Promo": 1000000, // 1M
+ "MiniMaxAI/MiniMax-M2.7": 204800,
+ "MiniMaxAI/MiniMax-M2.5": 204800,
+
+ // DeepSeek
+ "deepseek/deepseek-v4-pro": 1000000, // 1M
+ "deepseek/deepseek-v4-flash": 1000000, // 1M
+
+ // Qwen
+ "Qwen/Qwen3.6-Max-Preview": 1048576, // 1M
+ "Qwen/Qwen3.6-Plus": 1048576, // 1M
+ "Qwen/Qwen3.7-Max": 1048576, // 1M
+ "Qwen/Qwen3.7-Plus": 1048576, // 1M
+
+ // StepFun
+ "stepfun/Step-3.7-Flash": 262144,
+ "stepfun/Step-3.5-Flash": 262144,
+
+ // Xiaomi MiMo
+ "xiaomi/mimo-v2.5-pro": 1048576, // 1M
+ "xiaomi/mimo-v2.5": 1048576, // 1M
+
+ // NVIDIA Nemotron
+ "nvidia/nemotron-3-ultra-550b-a55b": 131072,
+
+ // Anthropic Claude
+ "claude-sonnet-4-6": 1000000, // 1M
+ "claude-fable-5": 1000000, // 1M
+ "claude-opus-4-8": 1000000, // 1M
+ "claude-opus-4-7": 1000000, // 1M
+ "claude-opus-4-6": 200000,
+ "claude-haiku-4-5": 200000,
+ "claude-haiku-4-5-20251001": 200000,
+
+ // OpenAI GPT
+ "gpt-5.5": 1050000, // ~1M with reasoning overhead
+ "gpt-5.4": 1050000,
+ "gpt-5.3-codex": 400000,
+ "gpt-5.4-mini": 400000,
+
+ // Google Gemini
+ "google/gemini-3.5-flash": 1048576, // 1M
+ "google/gemini-3.1-flash-lite": 1048576, // 1M
+}
+
+// defaultContextLength is returned for unknown models when we can't determine
+// their context window. Conservative — better to over-estimate and trigger
+// compression early than to under-estimate and blow the window.
+const defaultContextLength = 131072 // 128K
+
+// tasteOneModelID is Command Code's internal model that ships free with all plans.
+// Not exposed via upstream /v1/models API; hardcoded here so it's discoverable
+// through the proxy.
+const tasteOneModelID = "taste-1"
+
+// tasteOneContextLength is the context window for taste-1. Set to a reasonable
+// coding-agent window — tune if Command Code publishes a different number.
+const tasteOneContextLength = 262144 // 256K
+
+// ContextLengthFor returns the context window for a model ID.
+// Falls back to defaultContextLength if the model is not in the map.
+func ContextLengthFor(modelID string) int {
+ if n, ok := contextLengthMap[modelID]; ok {
+ return n
+ }
+ // Try matching by suffix (model names can have provider prefixes we don't track)
+ for knownID, n := range contextLengthMap {
+ if modelMatches(modelID, knownID) {
+ return n
+ }
+ }
+ return defaultContextLength
+}
+
+// modelMatches does fuzzy matching — strips common prefixes and compares.
+// e.g. "minimax/MiniMax-M3" matches "MiniMaxAI/MiniMax-M3"
+func modelMatches(query, known string) bool {
+ // Extract model name after the last "/"
+ getSuffix := func(s string) string {
+ for i := len(s) - 1; i >= 0; i-- {
+ if s[i] == '/' {
+ return s[i+1:]
+ }
+ }
+ return s
+ }
+ return getSuffix(query) == getSuffix(known)
+}
diff --git a/internal/proxy/model.go b/internal/proxy/model.go
index 773d247..8621a0e 100644
--- a/internal/proxy/model.go
+++ b/internal/proxy/model.go
@@ -13,6 +13,12 @@ func MapModel(name string) string {
return "MiniMaxAI/MiniMax-M2.7"
case "minimax-m2.5", "minimax2.5", "minimax":
return "MiniMaxAI/MiniMax-M2.5"
+ case "minimax-m3", "minimax3":
+ return "MiniMaxAI/MiniMax-M3"
+ case "minimax-m3-promo", "minimax3-promo":
+ return "MiniMaxAI/MiniMax-M3-Promo"
+ case "glm-5.2", "glm-52":
+ return "zai-org/GLM-5.2"
case "glm-5.1":
return "zai-org/GLM-5.1"
case "glm-5":
@@ -21,14 +27,52 @@ func MapModel(name string) string {
return "moonshotai/Kimi-K2.6"
case "kimi-k2.5", "kimi2.5":
return "moonshotai/Kimi-K2.5"
+ case "kimi-k2.7-code", "kimi2.7-code":
+ return "moonshotai/Kimi-K2.7-Code"
+ case "kimi-k2.7-code-highspeed", "kimi2.7-code-highspeed":
+ return "moonshotai/Kimi-K2.7-Code-Highspeed"
case "qwen-3.6-max-preview", "qwen3.6-max":
return "Qwen/Qwen3.6-Max-Preview"
case "qwen-3.6-plus", "qwen3.6-plus", "qwen3.6":
return "Qwen/Qwen3.6-Plus"
+ case "qwen-3.7-max", "qwen3.7-max":
+ return "Qwen/Qwen3.7-Max"
+ case "qwen-3.7-plus", "qwen3.7-plus", "qwen3.7":
+ return "Qwen/Qwen3.7-Plus"
case "step-3.5-flash", "step3.5":
return "stepfun/Step-3.5-Flash"
+ case "step-3.7-flash", "step3.7":
+ return "stepfun/Step-3.7-Flash"
+ case "mimo-v2.5-pro", "mimo2.5-pro":
+ return "xiaomi/mimo-v2.5-pro"
+ case "mimo-v2.5", "mimo2.5", "mimo":
+ return "xiaomi/mimo-v2.5"
+ case "nemotron-3-ultra", "nemotron":
+ return "nvidia/nemotron-3-ultra-550b-a55b"
+ case "claude-sonnet-4-6", "sonnet-4-6", "sonnet":
+ return "claude-sonnet-4-6"
+ case "claude-fable-5", "fable-5", "fable":
+ return "claude-fable-5"
+ case "claude-opus-4-8", "opus-4-8", "opus":
+ return "claude-opus-4-8"
+ case "claude-opus-4-7", "opus-4-7":
+ return "claude-opus-4-7"
+ case "claude-opus-4-6", "opus-4-6":
+ return "claude-opus-4-6"
+ case "claude-haiku-4-5", "haiku-4-5", "haiku":
+ return "claude-haiku-4-5"
+ case "gpt-5.5":
+ return "gpt-5.5"
+ case "gpt-5.4":
+ return "gpt-5.4"
+ case "gpt-5.3-codex", "codex":
+ return "gpt-5.3-codex"
+ case "gpt-5.4-mini", "gpt-mini":
+ return "gpt-5.4-mini"
case "gemini-3.1-flash-lite", "gemini-flash-lite":
return "google/gemini-3.1-flash-lite"
+ case "gemini-3.5-flash", "gemini-flash":
+ return "google/gemini-3.5-flash"
default:
return name // pass through as-is
}
diff --git a/internal/proxy/modelfetch.go b/internal/proxy/modelfetch.go
new file mode 100644
index 0000000..1d5e718
--- /dev/null
+++ b/internal/proxy/modelfetch.go
@@ -0,0 +1,311 @@
+package proxy
+
+import (
+ "bytes"
+ "context"
+ "encoding/json"
+ "fmt"
+ "io"
+ "log"
+ "net/http"
+ "strings"
+ "sync"
+ "time"
+
+ "github.com/dev2k6/command-code-proxy-server/internal/api"
+ "github.com/dev2k6/command-code-proxy-server/internal/version"
+)
+
+const (
+ // upstreamModelsURL is Command Code's Provider API models endpoint
+ upstreamModelsURL = "https://api.commandcode.ai/provider/v1/models"
+
+ // upstreamProbeURL is the chat endpoint we use to probe whether a model is
+ // actually reachable. We send a minimal request — if it returns non-403
+ // (e.g., 200 with content, or 400 for bad request), the model exists.
+ upstreamProbeURL = "https://api.commandcode.ai/alpha/generate"
+
+ // modelCacheTTL is how often we refresh the model list from upstream
+ modelCacheTTL = 6 * time.Hour
+
+ // modelFetchTimeout is the per-request timeout for upstream model fetching
+ modelFetchTimeout = 10 * time.Second
+
+ // modelProbeConcurrency caps how many models we probe in parallel.
+ // Upstream gets cranky if we hammer with 30 concurrent requests.
+ modelProbeConcurrency = 4
+)
+
+// UpstreamModel represents a single model in the upstream Command Code API
+// response (OpenAI-compatible /v1/models format).
+type UpstreamModel struct {
+ ID string `json:"id"`
+ Object string `json:"object"`
+ Created int64 `json:"created"`
+ OwnedBy string `json:"owned_by"`
+}
+
+// UpstreamModelList is the response wrapper for /v1/models
+type UpstreamModelList struct {
+ Object string `json:"object"`
+ Data []UpstreamModel `json:"data"`
+}
+
+// ModelCache holds the dynamically-fetched model list with thread-safe access.
+type ModelCache struct {
+ mu sync.RWMutex
+ models []api.OpenAIModel
+ fetchedAt time.Time
+ httpClient *http.Client
+ fetchingNow bool // prevents concurrent refreshes
+ pricing *PricingCache
+ modelInfo *ModelInfoCache
+}
+
+// NewModelCache creates an empty cache. Call Refresh() to populate it.
+func NewModelCache(pricing *PricingCache, info *ModelInfoCache) *ModelCache {
+ return &ModelCache{
+ models: nil,
+ pricing: pricing,
+ modelInfo: info,
+ httpClient: &http.Client{
+ Timeout: modelFetchTimeout,
+ },
+ }
+}
+
+// Get returns a copy of the current cached models. If cache is empty,
+// returns the static fallback list from getStaticModels().
+func (c *ModelCache) Get() []api.OpenAIModel {
+ c.mu.RLock()
+ defer c.mu.RUnlock()
+ if len(c.models) == 0 {
+ // Return static fallback enriched with model info (if available)
+ static := getStaticModels()
+ if c.modelInfo != nil {
+ for i := range static {
+ c.modelInfo.AttachInfo(&static[i])
+ }
+ }
+ return static
+ }
+ // Return a copy so callers can't mutate cache state
+ out := make([]api.OpenAIModel, len(c.models))
+ copy(out, c.models)
+ return out
+}
+
+// IsStale returns true if the cache needs refreshing.
+func (c *ModelCache) IsStale() bool {
+ c.mu.RLock()
+ defer c.mu.RUnlock()
+ return time.Since(c.fetchedAt) > modelCacheTTL || len(c.models) == 0
+}
+
+// Refresh fetches the upstream model list and rebuilds the cache.
+// Models that 403 upstream (model not recognized) are filtered out.
+// On error, logs but keeps the existing cache (or static fallback if empty).
+func (c *ModelCache) Refresh(apiKey string) error {
+ c.mu.Lock()
+ if c.fetchingNow {
+ c.mu.Unlock()
+ return nil // another goroutine is already refreshing
+ }
+ c.fetchingNow = true
+ c.mu.Unlock()
+
+ defer func() {
+ c.mu.Lock()
+ c.fetchingNow = false
+ c.mu.Unlock()
+ }()
+
+ models, err := c.fetchAndValidate(apiKey)
+ if err != nil {
+ return err
+ }
+
+ c.mu.Lock()
+ c.models = models
+ c.fetchedAt = time.Now()
+ c.mu.Unlock()
+
+ log.Printf("[models] refreshed cache: %d models (validated against upstream)", len(models))
+ return nil
+}
+
+// fetchAndValidate fetches the upstream model list, then probes each model
+// to verify it's reachable. Models that 403 upstream are filtered out.
+func (c *ModelCache) fetchAndValidate(apiKey string) ([]api.OpenAIModel, error) {
+ ctx, cancel := context.WithTimeout(context.Background(), modelFetchTimeout)
+ defer cancel()
+
+ req, err := http.NewRequestWithContext(ctx, http.MethodGet, upstreamModelsURL, nil)
+ if err != nil {
+ return nil, fmt.Errorf("build request: %w", err)
+ }
+ req.Header.Set("Authorization", "Bearer "+apiKey)
+ req.Header.Set("Accept", "application/json")
+ req.Header.Set("User-Agent", "command-code-proxy/1.0")
+
+ resp, err := c.httpClient.Do(req)
+ if err != nil {
+ return nil, fmt.Errorf("fetch upstream: %w", err)
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ body, _ := io.ReadAll(resp.Body)
+ return nil, fmt.Errorf("upstream status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
+ }
+
+ var upstream UpstreamModelList
+ if err := json.NewDecoder(resp.Body).Decode(&upstream); err != nil {
+ return nil, fmt.Errorf("decode upstream: %w", err)
+ }
+
+ // Probe each model in parallel to filter out non-existent ones.
+ probeResults := c.probeModels(apiKey, upstream.Data)
+
+ // Build the validated model list.
+ enriched := make([]api.OpenAIModel, 0, len(probeResults))
+ for _, pr := range probeResults {
+ if !pr.reachable {
+ log.Printf("[models] filtered out unreachable model: %s (probe status: %d)", pr.model.ID, pr.status)
+ continue
+ }
+ m := api.OpenAIModel{
+ ID: pr.model.ID,
+ Object: pr.model.Object,
+ Created: pr.model.Created,
+ OwnedBy: pr.model.OwnedBy,
+ ContextLength: ContextLengthFor(pr.model.ID),
+ }
+ // Attach pricing/deal info if available
+ if c.pricing != nil {
+ if deal, ok := c.pricing.GetDeal(pr.model.ID); ok {
+ m.Pricing = &api.ModelPricing{
+ Multiplier: deal.Multiplier,
+ Description: deal.Description,
+ Status: deal.Status,
+ }
+ }
+ }
+ // Attach display name + description if available
+ if c.modelInfo != nil {
+ c.modelInfo.AttachInfo(&m)
+ }
+ enriched = append(enriched, m)
+ }
+ return enriched, nil
+}
+
+// probeResult holds the result of probing a single model.
+type probeResult struct {
+ model UpstreamModel
+ reachable bool
+ status int
+}
+
+// probeModels probes each model in parallel to verify upstream reachability.
+// Returns results in input order.
+func (c *ModelCache) probeModels(apiKey string, models []UpstreamModel) []probeResult {
+ results := make([]probeResult, len(models))
+ sem := make(chan struct{}, modelProbeConcurrency)
+ var wg sync.WaitGroup
+
+ for i, m := range models {
+ wg.Add(1)
+ sem <- struct{}{}
+ go func(idx int, model UpstreamModel) {
+ defer wg.Done()
+ defer func() { <-sem }()
+ reachable, status := c.probeModel(apiKey, model.ID)
+ results[idx] = probeResult{model: model, reachable: reachable, status: status}
+ }(i, m)
+ }
+ wg.Wait()
+ return results
+}
+
+// probeModel sends a minimal chat request to verify a model is reachable.
+// Returns (true, status) if the model exists (any non-403 response).
+// Returns (false, status) if upstream returns 403 (model not recognized).
+func (c *ModelCache) probeModel(apiKey, modelID string) (bool, int) {
+ ctx, cancel := context.WithTimeout(context.Background(), modelFetchTimeout)
+ defer cancel()
+
+ // Build a minimal probe request — empty messages, max 1 token.
+ // Upstream will return either:
+ // - 403 MODEL_NOT_IN_PLAN or "model not recognized" — model doesn't exist
+ // - 200 with empty content — model exists but request was minimal
+ // - 400 bad request — model exists, request was malformed
+ probeBody := map[string]any{
+ "config": map[string]any{
+ "workingDir": ".",
+ "date": time.Now().Format("2006-01-02"),
+ "environment": "cli",
+ "structure": []string{},
+ "isGitRepo": false,
+ "currentBranch": "",
+ "mainBranch": "main",
+ "gitStatus": "",
+ "recentCommits": []string{},
+ },
+ "memory": "",
+ "taste": "",
+ "skills": "",
+ "params": map[string]any{
+ "model": modelID,
+ "messages": []map[string]any{},
+ "tools": []any{},
+ "system": "",
+ "max_tokens": 1,
+ "temperature": 0.0,
+ "stream": false,
+ },
+ "threadId": "probe",
+ }
+ bodyJSON, _ := json.Marshal(probeBody)
+
+ req, err := http.NewRequestWithContext(ctx, http.MethodPost, upstreamProbeURL, bytes.NewReader(bodyJSON))
+ if err != nil {
+ return false, 0
+ }
+ req.Header.Set("Content-Type", "application/json")
+ req.Header.Set("Authorization", "Bearer "+apiKey)
+ req.Header.Set("x-command-code-version", version.GetCommandCodeVersion())
+ req.Header.Set("x-cli-environment", "production")
+ req.Header.Set("Accept", "application/json")
+ req.Header.Set("User-Agent", "command-code-proxy/1.0")
+
+ resp, err := c.httpClient.Do(req)
+ if err != nil {
+ return false, 0
+ }
+ defer resp.Body.Close()
+ io.Copy(io.Discard, resp.Body)
+
+ // 403 means model not recognized — filter out.
+ // Anything else (200, 400, 429, 500) means model exists at this ID.
+ return resp.StatusCode != http.StatusForbidden, resp.StatusCode
+}
+
+// StartBackgroundRefresh launches a goroutine that periodically refreshes
+// the cache. Safe to call once at startup.
+func (c *ModelCache) StartBackgroundRefresh(apiKey string) {
+ go func() {
+ // Initial fetch
+ if err := c.Refresh(apiKey); err != nil {
+ log.Printf("[models] initial refresh failed (using static fallback): %v", err)
+ }
+
+ ticker := time.NewTicker(modelCacheTTL)
+ defer ticker.Stop()
+ for range ticker.C {
+ if err := c.Refresh(apiKey); err != nil {
+ log.Printf("[models] refresh failed (keeping existing cache): %v", err)
+ }
+ }
+ }()
+}
diff --git a/internal/proxy/modelinfo.go b/internal/proxy/modelinfo.go
new file mode 100644
index 0000000..d148728
--- /dev/null
+++ b/internal/proxy/modelinfo.go
@@ -0,0 +1,231 @@
+package proxy
+
+import (
+ "context"
+ "io"
+ "log"
+ "net/http"
+ "regexp"
+ "strings"
+ "sync"
+ "time"
+
+ "github.com/dev2k6/command-code-proxy-server/internal/api"
+)
+
+const (
+ // modelsDocURL is the public Command Code docs page that lists every model
+ // with its display name and a one-line "best for" description.
+ modelsDocURL = "https://commandcode.ai/docs/reference/cli/models"
+
+ // modelInfoFetchInterval — refresh model info every 24 hours. Descriptions
+ // change infrequently; daily refresh is plenty.
+ modelInfoFetchInterval = 24 * time.Hour
+
+ // modelInfoFetchTimeout — per-request timeout.
+ modelInfoFetchTimeout = 15 * time.Second
+)
+
+// ModelInfo describes a single model: its display name, best-for description,
+// and capabilities (e.g., "text, vision").
+type ModelInfo struct {
+ ID string `json:"id"` // matches upstream model ID (lowercase for lookup)
+ DisplayName string `json:"display_name"` // human-friendly name like "Claude Sonnet 4.6"
+ Description string `json:"description"` // one-line "best for" description
+ Capabilities string `json:"capabilities"` // comma-separated: "text", "text, vision"
+}
+
+// ModelInfoCache holds scraped model metadata with thread-safe access.
+type ModelInfoCache struct {
+ mu sync.RWMutex
+ infos map[string]ModelInfo // key: model ID (lowercase)
+ fetchedAt time.Time
+ client *http.Client
+}
+
+// NewModelInfoCache creates an empty cache.
+func NewModelInfoCache() *ModelInfoCache {
+ return &ModelInfoCache{
+ infos: make(map[string]ModelInfo),
+ client: &http.Client{Timeout: modelInfoFetchTimeout},
+ }
+}
+
+// Get returns the model info for a given ID, or zero-value if unknown.
+// Tries exact match first, then fuzzy match by name.
+func (c *ModelInfoCache) Get(modelID string) (ModelInfo, bool) {
+ c.mu.RLock()
+ defer c.mu.RUnlock()
+ lower := strings.ToLower(modelID)
+ if info, ok := c.infos[lower]; ok {
+ return info, true
+ }
+ // Fuzzy: last path component normalized
+ lastPath := lower
+ if i := strings.LastIndex(lower, "/"); i >= 0 {
+ lastPath = lower[i+1:]
+ }
+ for k, info := range c.infos {
+ // Try matching last path of stored key
+ storedLast := k
+ if i := strings.LastIndex(k, "/"); i >= 0 {
+ storedLast = k[i+1:]
+ }
+ if normalizeModelKey(lastPath) == normalizeModelKey(storedLast) {
+ return info, true
+ }
+ }
+ return ModelInfo{}, false
+}
+
+// normalizeModelKey strips hyphens, dots, and lowercases for fuzzy comparison.
+func normalizeModelKey(s string) string {
+ s = strings.ReplaceAll(s, "-", "")
+ s = strings.ReplaceAll(s, ".", "")
+ s = strings.ReplaceAll(s, "_", "")
+ return strings.ToLower(s)
+}
+
+// Refresh scrapes the docs page and rebuilds the cache.
+func (c *ModelInfoCache) Refresh() error {
+ ctx, cancel := context.WithTimeout(context.Background(), modelInfoFetchTimeout)
+ defer cancel()
+
+ req, err := http.NewRequestWithContext(ctx, http.MethodGet, modelsDocURL, nil)
+ if err != nil {
+ return err
+ }
+ req.Header.Set("User-Agent", "command-code-proxy/1.0")
+
+ resp, err := c.client.Do(req)
+ if err != nil {
+ return err
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ return &ModelInfoFetchError{Status: resp.StatusCode}
+ }
+
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return err
+ }
+
+ infos := scrapeModelInfo(string(body))
+
+ c.mu.Lock()
+ c.infos = infos
+ c.fetchedAt = time.Now()
+ c.mu.Unlock()
+
+ log.Printf("[modelinfo] refreshed: %d models", len(infos))
+ return nil
+}
+
+// ModelInfoFetchError returned on non-200 from upstream docs page.
+type ModelInfoFetchError struct {
+ Status int
+}
+
+func (e *ModelInfoFetchError) Error() string {
+ return "model info fetch failed: status " + http.StatusText(e.Status)
+}
+
+// StartBackgroundRefresh launches a goroutine that periodically refreshes info.
+func (c *ModelInfoCache) StartBackgroundRefresh() {
+ go func() {
+ if err := c.Refresh(); err != nil {
+ log.Printf("[modelinfo] initial refresh failed (descriptions will be empty): %v", err)
+ }
+ ticker := time.NewTicker(modelInfoFetchInterval)
+ defer ticker.Stop()
+ for range ticker.C {
+ if err := c.Refresh(); err != nil {
+ log.Printf("[modelinfo] refresh failed (keeping existing): %v", err)
+ }
+ }
+ }()
+}
+
+// scrapeModelInfo parses the docs HTML and extracts (id, name, description, capabilities)
+// for each model. Returns a map keyed by lowercase model ID.
+func scrapeModelInfo(html string) map[string]ModelInfo {
+ infos := make(map[string]ModelInfo)
+
+ // Pattern matches rows in the docs models table:
+ // model-id ...
]*>([^<]+).*? tag.
+ var realBlocks [][]string
+ for _, b := range dealBlocks {
+ if len(b[1]) > 2000 {
+ continue // too long — likely a nav menu or footer
+ }
+ if strings.Contains(b[1], ", len<2000)", len(realBlocks))
+ dealBlocks = realBlocks
+ if len(dealBlocks) == 0 {
+ return deals
+ }
+
+ codeRe := regexp.MustCompile(`]*>([^<]+)`)
+ statusRe := regexp.MustCompile(`(permanent|through\s+\w+\s+\d+,\s+\d{4})`)
+
+ for _, block := range dealBlocks {
+ if len(block) < 2 {
+ continue
+ }
+ inner := block[1]
+
+ // Find all model codes in this deal
+ codes := codeRe.FindAllStringSubmatch(inner, -1)
+ if len(codes) == 0 {
+ continue
+ }
+
+ // Find status
+ statusMatch := statusRe.FindStringSubmatch(inner)
+ status := "unknown"
+ if len(statusMatch) >= 2 {
+ status = statusMatch[1]
+ }
+
+ // Description: text after the LAST in the deal, before status marker
+ lastCodeEnd := strings.LastIndex(inner, "")
+ if lastCodeEnd < 0 {
+ continue
+ }
+ descStart := lastCodeEnd + len("")
+
+ // Find status in remaining text
+ descText := inner[descStart:]
+ statusIdx := strings.Index(descText, status)
+ if statusIdx < 0 {
+ // Status might be in nested span; take up to next
+ spanEnd := strings.Index(descText, "")
+ if spanEnd < 0 {
+ spanEnd = len(descText)
+ }
+ statusIdx = spanEnd
+ }
+ desc := descText[:statusIdx]
+ desc = stripHTMLTags(desc)
+ desc = strings.TrimSpace(desc)
+
+ mult := extractMultiplier(desc)
+
+ // Apply to all models in this deal
+ for _, c := range codes {
+ if len(c) < 2 {
+ continue
+ }
+ model := strings.TrimSpace(c[1])
+ deal := Deal{
+ Model: model,
+ Multiplier: mult,
+ Description: desc,
+ Status: status,
+ }
+ deals[strings.ToLower(model)] = deal
+ }
+ }
+
+ if len(deals) == 0 {
+ log.Printf("[pricing] WARNING: no deals parsed from %d blocks — extraction failed", len(dealBlocks))
+ }
+
+ return deals
+}
+
+// stripHTMLTags removes HTML tags from a string (best-effort, no full parser).
+func stripHTMLTags(s string) string {
+ tag := regexp.MustCompile(`<[^>]+>`)
+ s = tag.ReplaceAllString(s, "")
+ // Collapse whitespace
+ ws := regexp.MustCompile(`\s+`)
+ s = ws.ReplaceAllString(s, " ")
+ return s
+}
+
+// extractMultiplier pulls the multiplier string (e.g., "4×", "2x", "99% off") from a description.
+func extractMultiplier(desc string) string {
+ // Try × (Unicode), x, or %
+ patterns := []struct {
+ re *regexp.Regexp
+ tmpl func([]string) string
+ }{
+ {regexp.MustCompile(`(\d+(?:\.\d+)?)\s*[x×]`), func(m []string) string { return m[1] + "x" }},
+ {regexp.MustCompile(`(\d+)%\s*off`), func(m []string) string { return m[1] + "% off" }},
+ }
+ for _, p := range patterns {
+ if m := p.re.FindStringSubmatch(desc); m != nil {
+ return p.tmpl(m)
+ }
+ }
+ return ""
+}
diff --git a/internal/proxy/proxy.go b/internal/proxy/proxy.go
index cecd881..a7ebc5f 100644
--- a/internal/proxy/proxy.go
+++ b/internal/proxy/proxy.go
@@ -60,19 +60,34 @@ func normalizeFinishReason(reason string) string {
// Proxy struct
type Proxy struct {
- APIKey string
- BaseURL string
- Client *http.Client
- Debug bool
+ APIKey string
+ BaseURL string
+ Client *http.Client
+ Debug bool
+ Models *ModelCache
+ Pricing *PricingCache
+ ModelInfo *ModelInfoCache
}
// NewProxy creates a new proxy instance
func NewProxy(apiKey string) *Proxy {
- return &Proxy{
- APIKey: apiKey,
- BaseURL: defaultBaseURL,
- Client: &http.Client{Timeout: defaultTimeout},
+ pricing := NewPricingCache()
+ modelInfo := NewModelInfoCache()
+ p := &Proxy{
+ APIKey: apiKey,
+ BaseURL: defaultBaseURL,
+ Client: &http.Client{Timeout: defaultTimeout},
+ Models: NewModelCache(pricing, modelInfo),
+ Pricing: pricing,
+ ModelInfo: modelInfo,
+ }
+ // Kick off background refreshes — don't block startup
+ pricing.StartBackgroundRefresh()
+ modelInfo.StartBackgroundRefresh()
+ if apiKey != "" {
+ p.Models.StartBackgroundRefresh(apiKey)
}
+ return p
}
// BuildRequest builds the CommandCode request body
@@ -95,6 +110,12 @@ func (p *Proxy) BuildRequest(openAIReq api.OpenAIChatRequest) (api.CCRequestBody
tools := ConvertTools(openAIReq.Tools)
+ // Map reasoning effort
+ var reasoning string
+ if openAIReq.Reasoning != nil {
+ reasoning = string(*openAIReq.Reasoning)
+ }
+
ccBody := api.CCRequestBody{
Config: api.CCConfig{
WorkingDir: ".",
@@ -118,6 +139,7 @@ func (p *Proxy) BuildRequest(openAIReq api.OpenAIChatRequest) (api.CCRequestBody
MaxTokens: maxTokens,
Temperature: temperature,
Stream: true,
+ Reasoning: reasoning,
},
ThreadID: uuid.New().String(),
}
@@ -668,30 +690,79 @@ func responseItemsToMessages(items []any) []api.OpenAIMessage {
// HandleModels handles the /v1/models endpoint
func (p *Proxy) HandleModels(w http.ResponseWriter, r *http.Request) {
+ // Use the dynamic cache; fall back to static list if cache is empty/unavailable.
+ cached := p.Models.Get()
+
+ // If cache returned the static fallback (empty cache triggers fallback in Get),
+ // still try to refresh in the background so the next request gets fresh data.
+ if p.Models.IsStale() && !p.Models.fetchingNow {
+ go func() {
+ if err := p.Models.Refresh(p.APIKey); err != nil {
+ log.Printf("[models] background refresh failed: %v", err)
+ }
+ }()
+ }
+
models := api.OpenAIModelList{
Object: "list",
- Data: []api.OpenAIModel{
- // MoonshotAI
- {ID: "moonshotai/Kimi-K2.6", Object: "model", Created: 0, OwnedBy: "moonshotai"},
- {ID: "moonshotai/Kimi-K2.5", Object: "model", Created: 0, OwnedBy: "moonshotai"},
- // ZhipuAI
- {ID: "zai-org/GLM-5.1", Object: "model", Created: 0, OwnedBy: "zhipuai"},
- {ID: "zai-org/GLM-5", Object: "model", Created: 0, OwnedBy: "zhipuai"},
- // MiniMaxAI
- {ID: "MiniMaxAI/MiniMax-M2.7", Object: "model", Created: 0, OwnedBy: "minimaxai"},
- {ID: "MiniMaxAI/MiniMax-M2.5", Object: "model", Created: 0, OwnedBy: "minimaxai"},
- // DeepSeek
- {ID: "deepseek/deepseek-v4-pro", Object: "model", Created: 0, OwnedBy: "deepseek"},
- {ID: "deepseek/deepseek-v4-flash", Object: "model", Created: 0, OwnedBy: "deepseek"},
- // Qwen
- {ID: "Qwen/Qwen3.6-Max-Preview", Object: "model", Created: 0, OwnedBy: "qwen"},
- {ID: "Qwen/Qwen3.6-Plus", Object: "model", Created: 0, OwnedBy: "qwen"},
- // StepFun
- {ID: "stepfun/Step-3.5-Flash", Object: "model", Created: 0, OwnedBy: "stepfun"},
- // Google
- {ID: "google/gemini-3.1-flash-lite", Object: "model", Created: 0, OwnedBy: "google"},
- },
+ Data: cached,
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(models)
}
+
+// getStaticModels returns the hardcoded fallback list of models. Used when
+// the dynamic cache is empty and we haven't been able to fetch from upstream.
+// Context lengths are looked up from contextmap.go so we have one source of truth.
+func getStaticModels() []api.OpenAIModel {
+ ids := []string{
+ // MoonshotAI
+ "moonshotai/Kimi-K2.7-Code", "moonshotai/Kimi-K2.7-Code-Highspeed",
+ // ZhipuAI
+ "zai-org/GLM-5.2", "zai-org/GLM-5.1", "zai-org/GLM-5",
+ // MiniMaxAI
+ "MiniMaxAI/MiniMax-M3",
+ "MiniMaxAI/MiniMax-M2.7", "MiniMaxAI/MiniMax-M2.5",
+ // DeepSeek
+ "deepseek/deepseek-v4-pro", "deepseek/deepseek-v4-flash",
+ // Qwen
+ "Qwen/Qwen3.6-Max-Preview", "Qwen/Qwen3.6-Plus",
+ "Qwen/Qwen3.7-Max", "Qwen/Qwen3.7-Plus",
+ // StepFun
+ "stepfun/Step-3.7-Flash", "stepfun/Step-3.5-Flash",
+ // Xiaomi
+ "xiaomi/mimo-v2.5-pro", "xiaomi/mimo-v2.5",
+ // NVIDIA
+ "nvidia/nemotron-3-ultra-550b-a55b",
+ // Anthropic
+ "claude-sonnet-4-6", "claude-fable-5",
+ "claude-opus-4-8", "claude-opus-4-7", "claude-opus-4-6",
+ "claude-haiku-4-5", "claude-haiku-4-5-20251001",
+ // OpenAI
+ "gpt-5.5", "gpt-5.4", "gpt-5.3-codex", "gpt-5.4-mini",
+ // Google
+ "google/gemini-3.5-flash", "google/gemini-3.1-flash-lite",
+ }
+ out := make([]api.OpenAIModel, 0, len(ids))
+ for _, id := range ids {
+ m := api.OpenAIModel{
+ ID: id,
+ Object: "model",
+ Created: 0,
+ OwnedBy: inferOwner(id),
+ ContextLength: ContextLengthFor(id),
+ }
+ out = append(out, m)
+ }
+ return out
+}
+
+// inferOwner extracts the provider name from a model ID like "anthropic/claude-...".
+func inferOwner(id string) string {
+ for i, c := range id {
+ if c == '/' {
+ return id[:i]
+ }
+ }
+ return "unknown"
+}