diff --git a/.gitignore b/.gitignore index d90563d..8ae3228 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,9 @@ # OS files .DS_Store Thumbs.db + +# Build artifacts & local-only scripts +bin/command-code-proxy-updated +bin/command-code-proxy-new.exe +run-proxy.bat +run-proxy.vbs diff --git a/bin/command-code-proxy.exe b/bin/command-code-proxy.exe index 7825553..27bb947 100644 Binary files a/bin/command-code-proxy.exe and b/bin/command-code-proxy.exe differ diff --git a/internal/api/commandcode.go b/internal/api/commandcode.go index 6e1f0f8..e04f077 100644 --- a/internal/api/commandcode.go +++ b/internal/api/commandcode.go @@ -33,6 +33,7 @@ type CCChatParams struct { MaxTokens int `json:"max_tokens"` Temperature float64 `json:"temperature"` Stream bool `json:"stream"` + Reasoning string `json:"reasoning,omitempty"` } type CCConfig struct { diff --git a/internal/api/openai.go b/internal/api/openai.go index 8e76198..7e77953 100644 --- a/internal/api/openai.go +++ b/internal/api/openai.go @@ -58,8 +58,17 @@ type OpenAIChatRequest struct { PresencePenalty *float64 `json:"presence_penalty,omitempty"` FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"` User string `json:"user,omitempty"` + Reasoning *ReasoningEffort `json:"reasoning,omitempty"` } +type ReasoningEffort string + +const ( + ReasoningEffortLow ReasoningEffort = "low" + ReasoningEffortMedium ReasoningEffort = "medium" + ReasoningEffortHigh ReasoningEffort = "high" +) + type OpenAIResponsesRequest struct { Model string `json:"model"` Input any `json:"input"` @@ -131,10 +140,26 @@ type OpenAIError struct { } type OpenAIModel struct { - ID string `json:"id"` - Object string `json:"object"` - Created int64 `json:"created"` - OwnedBy string `json:"owned_by"` + ID string `json:"id"` + Object string `json:"object"` + Created int64 `json:"created"` + OwnedBy string `json:"owned_by"` + ContextLength int `json:"context_length,omitempty"` + Pricing *ModelPricing `json:"pricing,omitempty"` + // Descriptive metadata from Command Code's docs. + // Populated dynamically — display_name is "Claude Sonnet 4.6" etc, + // description is the one-line "best for" summary. + DisplayName string `json:"display_name,omitempty"` + Description string `json:"description,omitempty"` + Capabilities string `json:"capabilities,omitempty"` +} + +// ModelPricing describes deal/pricing information for a model. +// Only populated when an active deal exists. +type ModelPricing struct { + Multiplier string `json:"multiplier,omitempty"` // e.g. "4x", "2x", "99% off" + Description string `json:"description,omitempty"` // human-readable description + Status string `json:"status,omitempty"` // "permanent" or expiration date } type OpenAIModelList struct { diff --git a/internal/proxy/contextmap.go b/internal/proxy/contextmap.go new file mode 100644 index 0000000..e8c2598 --- /dev/null +++ b/internal/proxy/contextmap.go @@ -0,0 +1,112 @@ +package proxy + +// contextLengthMap maps model IDs to their context window size in tokens. +// Command Code's /v1/models endpoint does not expose context windows, so we +// maintain a local override. Unknown models fall back to 128K (safe default). +// +// Sources: +// - Upstream provider documentation (Anthropic, OpenAI, Google, etc.) +// - Command Code CLI docs (https://commandcode.ai/docs/reference/cli/models) +// - Model release notes +var contextLengthMap = map[string]int{ + // MoonshotAI Kimi + "moonshotai/Kimi-K2.7-Code": 262144, + "moonshotai/Kimi-K2.7-Code-Highspeed": 262144, + "moonshotai/Kimi-K2.6": 262144, + "moonshotai/Kimi-K2.5": 262144, + + // ZhipuAI GLM + "zai-org/GLM-5.2": 1048576, // 1M + "zai-org/GLM-5.1": 202752, + "zai-org/GLM-5": 202752, + + // MiniMax + "MiniMaxAI/MiniMax-M3": 1000000, // 1M + "MiniMaxAI/MiniMax-M3-Promo": 1000000, // 1M + "MiniMaxAI/MiniMax-M2.7": 204800, + "MiniMaxAI/MiniMax-M2.5": 204800, + + // DeepSeek + "deepseek/deepseek-v4-pro": 1000000, // 1M + "deepseek/deepseek-v4-flash": 1000000, // 1M + + // Qwen + "Qwen/Qwen3.6-Max-Preview": 1048576, // 1M + "Qwen/Qwen3.6-Plus": 1048576, // 1M + "Qwen/Qwen3.7-Max": 1048576, // 1M + "Qwen/Qwen3.7-Plus": 1048576, // 1M + + // StepFun + "stepfun/Step-3.7-Flash": 262144, + "stepfun/Step-3.5-Flash": 262144, + + // Xiaomi MiMo + "xiaomi/mimo-v2.5-pro": 1048576, // 1M + "xiaomi/mimo-v2.5": 1048576, // 1M + + // NVIDIA Nemotron + "nvidia/nemotron-3-ultra-550b-a55b": 131072, + + // Anthropic Claude + "claude-sonnet-4-6": 1000000, // 1M + "claude-fable-5": 1000000, // 1M + "claude-opus-4-8": 1000000, // 1M + "claude-opus-4-7": 1000000, // 1M + "claude-opus-4-6": 200000, + "claude-haiku-4-5": 200000, + "claude-haiku-4-5-20251001": 200000, + + // OpenAI GPT + "gpt-5.5": 1050000, // ~1M with reasoning overhead + "gpt-5.4": 1050000, + "gpt-5.3-codex": 400000, + "gpt-5.4-mini": 400000, + + // Google Gemini + "google/gemini-3.5-flash": 1048576, // 1M + "google/gemini-3.1-flash-lite": 1048576, // 1M +} + +// defaultContextLength is returned for unknown models when we can't determine +// their context window. Conservative — better to over-estimate and trigger +// compression early than to under-estimate and blow the window. +const defaultContextLength = 131072 // 128K + +// tasteOneModelID is Command Code's internal model that ships free with all plans. +// Not exposed via upstream /v1/models API; hardcoded here so it's discoverable +// through the proxy. +const tasteOneModelID = "taste-1" + +// tasteOneContextLength is the context window for taste-1. Set to a reasonable +// coding-agent window — tune if Command Code publishes a different number. +const tasteOneContextLength = 262144 // 256K + +// ContextLengthFor returns the context window for a model ID. +// Falls back to defaultContextLength if the model is not in the map. +func ContextLengthFor(modelID string) int { + if n, ok := contextLengthMap[modelID]; ok { + return n + } + // Try matching by suffix (model names can have provider prefixes we don't track) + for knownID, n := range contextLengthMap { + if modelMatches(modelID, knownID) { + return n + } + } + return defaultContextLength +} + +// modelMatches does fuzzy matching — strips common prefixes and compares. +// e.g. "minimax/MiniMax-M3" matches "MiniMaxAI/MiniMax-M3" +func modelMatches(query, known string) bool { + // Extract model name after the last "/" + getSuffix := func(s string) string { + for i := len(s) - 1; i >= 0; i-- { + if s[i] == '/' { + return s[i+1:] + } + } + return s + } + return getSuffix(query) == getSuffix(known) +} diff --git a/internal/proxy/model.go b/internal/proxy/model.go index 773d247..8621a0e 100644 --- a/internal/proxy/model.go +++ b/internal/proxy/model.go @@ -13,6 +13,12 @@ func MapModel(name string) string { return "MiniMaxAI/MiniMax-M2.7" case "minimax-m2.5", "minimax2.5", "minimax": return "MiniMaxAI/MiniMax-M2.5" + case "minimax-m3", "minimax3": + return "MiniMaxAI/MiniMax-M3" + case "minimax-m3-promo", "minimax3-promo": + return "MiniMaxAI/MiniMax-M3-Promo" + case "glm-5.2", "glm-52": + return "zai-org/GLM-5.2" case "glm-5.1": return "zai-org/GLM-5.1" case "glm-5": @@ -21,14 +27,52 @@ func MapModel(name string) string { return "moonshotai/Kimi-K2.6" case "kimi-k2.5", "kimi2.5": return "moonshotai/Kimi-K2.5" + case "kimi-k2.7-code", "kimi2.7-code": + return "moonshotai/Kimi-K2.7-Code" + case "kimi-k2.7-code-highspeed", "kimi2.7-code-highspeed": + return "moonshotai/Kimi-K2.7-Code-Highspeed" case "qwen-3.6-max-preview", "qwen3.6-max": return "Qwen/Qwen3.6-Max-Preview" case "qwen-3.6-plus", "qwen3.6-plus", "qwen3.6": return "Qwen/Qwen3.6-Plus" + case "qwen-3.7-max", "qwen3.7-max": + return "Qwen/Qwen3.7-Max" + case "qwen-3.7-plus", "qwen3.7-plus", "qwen3.7": + return "Qwen/Qwen3.7-Plus" case "step-3.5-flash", "step3.5": return "stepfun/Step-3.5-Flash" + case "step-3.7-flash", "step3.7": + return "stepfun/Step-3.7-Flash" + case "mimo-v2.5-pro", "mimo2.5-pro": + return "xiaomi/mimo-v2.5-pro" + case "mimo-v2.5", "mimo2.5", "mimo": + return "xiaomi/mimo-v2.5" + case "nemotron-3-ultra", "nemotron": + return "nvidia/nemotron-3-ultra-550b-a55b" + case "claude-sonnet-4-6", "sonnet-4-6", "sonnet": + return "claude-sonnet-4-6" + case "claude-fable-5", "fable-5", "fable": + return "claude-fable-5" + case "claude-opus-4-8", "opus-4-8", "opus": + return "claude-opus-4-8" + case "claude-opus-4-7", "opus-4-7": + return "claude-opus-4-7" + case "claude-opus-4-6", "opus-4-6": + return "claude-opus-4-6" + case "claude-haiku-4-5", "haiku-4-5", "haiku": + return "claude-haiku-4-5" + case "gpt-5.5": + return "gpt-5.5" + case "gpt-5.4": + return "gpt-5.4" + case "gpt-5.3-codex", "codex": + return "gpt-5.3-codex" + case "gpt-5.4-mini", "gpt-mini": + return "gpt-5.4-mini" case "gemini-3.1-flash-lite", "gemini-flash-lite": return "google/gemini-3.1-flash-lite" + case "gemini-3.5-flash", "gemini-flash": + return "google/gemini-3.5-flash" default: return name // pass through as-is } diff --git a/internal/proxy/modelfetch.go b/internal/proxy/modelfetch.go new file mode 100644 index 0000000..1d5e718 --- /dev/null +++ b/internal/proxy/modelfetch.go @@ -0,0 +1,311 @@ +package proxy + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "strings" + "sync" + "time" + + "github.com/dev2k6/command-code-proxy-server/internal/api" + "github.com/dev2k6/command-code-proxy-server/internal/version" +) + +const ( + // upstreamModelsURL is Command Code's Provider API models endpoint + upstreamModelsURL = "https://api.commandcode.ai/provider/v1/models" + + // upstreamProbeURL is the chat endpoint we use to probe whether a model is + // actually reachable. We send a minimal request — if it returns non-403 + // (e.g., 200 with content, or 400 for bad request), the model exists. + upstreamProbeURL = "https://api.commandcode.ai/alpha/generate" + + // modelCacheTTL is how often we refresh the model list from upstream + modelCacheTTL = 6 * time.Hour + + // modelFetchTimeout is the per-request timeout for upstream model fetching + modelFetchTimeout = 10 * time.Second + + // modelProbeConcurrency caps how many models we probe in parallel. + // Upstream gets cranky if we hammer with 30 concurrent requests. + modelProbeConcurrency = 4 +) + +// UpstreamModel represents a single model in the upstream Command Code API +// response (OpenAI-compatible /v1/models format). +type UpstreamModel struct { + ID string `json:"id"` + Object string `json:"object"` + Created int64 `json:"created"` + OwnedBy string `json:"owned_by"` +} + +// UpstreamModelList is the response wrapper for /v1/models +type UpstreamModelList struct { + Object string `json:"object"` + Data []UpstreamModel `json:"data"` +} + +// ModelCache holds the dynamically-fetched model list with thread-safe access. +type ModelCache struct { + mu sync.RWMutex + models []api.OpenAIModel + fetchedAt time.Time + httpClient *http.Client + fetchingNow bool // prevents concurrent refreshes + pricing *PricingCache + modelInfo *ModelInfoCache +} + +// NewModelCache creates an empty cache. Call Refresh() to populate it. +func NewModelCache(pricing *PricingCache, info *ModelInfoCache) *ModelCache { + return &ModelCache{ + models: nil, + pricing: pricing, + modelInfo: info, + httpClient: &http.Client{ + Timeout: modelFetchTimeout, + }, + } +} + +// Get returns a copy of the current cached models. If cache is empty, +// returns the static fallback list from getStaticModels(). +func (c *ModelCache) Get() []api.OpenAIModel { + c.mu.RLock() + defer c.mu.RUnlock() + if len(c.models) == 0 { + // Return static fallback enriched with model info (if available) + static := getStaticModels() + if c.modelInfo != nil { + for i := range static { + c.modelInfo.AttachInfo(&static[i]) + } + } + return static + } + // Return a copy so callers can't mutate cache state + out := make([]api.OpenAIModel, len(c.models)) + copy(out, c.models) + return out +} + +// IsStale returns true if the cache needs refreshing. +func (c *ModelCache) IsStale() bool { + c.mu.RLock() + defer c.mu.RUnlock() + return time.Since(c.fetchedAt) > modelCacheTTL || len(c.models) == 0 +} + +// Refresh fetches the upstream model list and rebuilds the cache. +// Models that 403 upstream (model not recognized) are filtered out. +// On error, logs but keeps the existing cache (or static fallback if empty). +func (c *ModelCache) Refresh(apiKey string) error { + c.mu.Lock() + if c.fetchingNow { + c.mu.Unlock() + return nil // another goroutine is already refreshing + } + c.fetchingNow = true + c.mu.Unlock() + + defer func() { + c.mu.Lock() + c.fetchingNow = false + c.mu.Unlock() + }() + + models, err := c.fetchAndValidate(apiKey) + if err != nil { + return err + } + + c.mu.Lock() + c.models = models + c.fetchedAt = time.Now() + c.mu.Unlock() + + log.Printf("[models] refreshed cache: %d models (validated against upstream)", len(models)) + return nil +} + +// fetchAndValidate fetches the upstream model list, then probes each model +// to verify it's reachable. Models that 403 upstream are filtered out. +func (c *ModelCache) fetchAndValidate(apiKey string) ([]api.OpenAIModel, error) { + ctx, cancel := context.WithTimeout(context.Background(), modelFetchTimeout) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, upstreamModelsURL, nil) + if err != nil { + return nil, fmt.Errorf("build request: %w", err) + } + req.Header.Set("Authorization", "Bearer "+apiKey) + req.Header.Set("Accept", "application/json") + req.Header.Set("User-Agent", "command-code-proxy/1.0") + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("fetch upstream: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("upstream status %d: %s", resp.StatusCode, strings.TrimSpace(string(body))) + } + + var upstream UpstreamModelList + if err := json.NewDecoder(resp.Body).Decode(&upstream); err != nil { + return nil, fmt.Errorf("decode upstream: %w", err) + } + + // Probe each model in parallel to filter out non-existent ones. + probeResults := c.probeModels(apiKey, upstream.Data) + + // Build the validated model list. + enriched := make([]api.OpenAIModel, 0, len(probeResults)) + for _, pr := range probeResults { + if !pr.reachable { + log.Printf("[models] filtered out unreachable model: %s (probe status: %d)", pr.model.ID, pr.status) + continue + } + m := api.OpenAIModel{ + ID: pr.model.ID, + Object: pr.model.Object, + Created: pr.model.Created, + OwnedBy: pr.model.OwnedBy, + ContextLength: ContextLengthFor(pr.model.ID), + } + // Attach pricing/deal info if available + if c.pricing != nil { + if deal, ok := c.pricing.GetDeal(pr.model.ID); ok { + m.Pricing = &api.ModelPricing{ + Multiplier: deal.Multiplier, + Description: deal.Description, + Status: deal.Status, + } + } + } + // Attach display name + description if available + if c.modelInfo != nil { + c.modelInfo.AttachInfo(&m) + } + enriched = append(enriched, m) + } + return enriched, nil +} + +// probeResult holds the result of probing a single model. +type probeResult struct { + model UpstreamModel + reachable bool + status int +} + +// probeModels probes each model in parallel to verify upstream reachability. +// Returns results in input order. +func (c *ModelCache) probeModels(apiKey string, models []UpstreamModel) []probeResult { + results := make([]probeResult, len(models)) + sem := make(chan struct{}, modelProbeConcurrency) + var wg sync.WaitGroup + + for i, m := range models { + wg.Add(1) + sem <- struct{}{} + go func(idx int, model UpstreamModel) { + defer wg.Done() + defer func() { <-sem }() + reachable, status := c.probeModel(apiKey, model.ID) + results[idx] = probeResult{model: model, reachable: reachable, status: status} + }(i, m) + } + wg.Wait() + return results +} + +// probeModel sends a minimal chat request to verify a model is reachable. +// Returns (true, status) if the model exists (any non-403 response). +// Returns (false, status) if upstream returns 403 (model not recognized). +func (c *ModelCache) probeModel(apiKey, modelID string) (bool, int) { + ctx, cancel := context.WithTimeout(context.Background(), modelFetchTimeout) + defer cancel() + + // Build a minimal probe request — empty messages, max 1 token. + // Upstream will return either: + // - 403 MODEL_NOT_IN_PLAN or "model not recognized" — model doesn't exist + // - 200 with empty content — model exists but request was minimal + // - 400 bad request — model exists, request was malformed + probeBody := map[string]any{ + "config": map[string]any{ + "workingDir": ".", + "date": time.Now().Format("2006-01-02"), + "environment": "cli", + "structure": []string{}, + "isGitRepo": false, + "currentBranch": "", + "mainBranch": "main", + "gitStatus": "", + "recentCommits": []string{}, + }, + "memory": "", + "taste": "", + "skills": "", + "params": map[string]any{ + "model": modelID, + "messages": []map[string]any{}, + "tools": []any{}, + "system": "", + "max_tokens": 1, + "temperature": 0.0, + "stream": false, + }, + "threadId": "probe", + } + bodyJSON, _ := json.Marshal(probeBody) + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, upstreamProbeURL, bytes.NewReader(bodyJSON)) + if err != nil { + return false, 0 + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Authorization", "Bearer "+apiKey) + req.Header.Set("x-command-code-version", version.GetCommandCodeVersion()) + req.Header.Set("x-cli-environment", "production") + req.Header.Set("Accept", "application/json") + req.Header.Set("User-Agent", "command-code-proxy/1.0") + + resp, err := c.httpClient.Do(req) + if err != nil { + return false, 0 + } + defer resp.Body.Close() + io.Copy(io.Discard, resp.Body) + + // 403 means model not recognized — filter out. + // Anything else (200, 400, 429, 500) means model exists at this ID. + return resp.StatusCode != http.StatusForbidden, resp.StatusCode +} + +// StartBackgroundRefresh launches a goroutine that periodically refreshes +// the cache. Safe to call once at startup. +func (c *ModelCache) StartBackgroundRefresh(apiKey string) { + go func() { + // Initial fetch + if err := c.Refresh(apiKey); err != nil { + log.Printf("[models] initial refresh failed (using static fallback): %v", err) + } + + ticker := time.NewTicker(modelCacheTTL) + defer ticker.Stop() + for range ticker.C { + if err := c.Refresh(apiKey); err != nil { + log.Printf("[models] refresh failed (keeping existing cache): %v", err) + } + } + }() +} diff --git a/internal/proxy/modelinfo.go b/internal/proxy/modelinfo.go new file mode 100644 index 0000000..d148728 --- /dev/null +++ b/internal/proxy/modelinfo.go @@ -0,0 +1,231 @@ +package proxy + +import ( + "context" + "io" + "log" + "net/http" + "regexp" + "strings" + "sync" + "time" + + "github.com/dev2k6/command-code-proxy-server/internal/api" +) + +const ( + // modelsDocURL is the public Command Code docs page that lists every model + // with its display name and a one-line "best for" description. + modelsDocURL = "https://commandcode.ai/docs/reference/cli/models" + + // modelInfoFetchInterval — refresh model info every 24 hours. Descriptions + // change infrequently; daily refresh is plenty. + modelInfoFetchInterval = 24 * time.Hour + + // modelInfoFetchTimeout — per-request timeout. + modelInfoFetchTimeout = 15 * time.Second +) + +// ModelInfo describes a single model: its display name, best-for description, +// and capabilities (e.g., "text, vision"). +type ModelInfo struct { + ID string `json:"id"` // matches upstream model ID (lowercase for lookup) + DisplayName string `json:"display_name"` // human-friendly name like "Claude Sonnet 4.6" + Description string `json:"description"` // one-line "best for" description + Capabilities string `json:"capabilities"` // comma-separated: "text", "text, vision" +} + +// ModelInfoCache holds scraped model metadata with thread-safe access. +type ModelInfoCache struct { + mu sync.RWMutex + infos map[string]ModelInfo // key: model ID (lowercase) + fetchedAt time.Time + client *http.Client +} + +// NewModelInfoCache creates an empty cache. +func NewModelInfoCache() *ModelInfoCache { + return &ModelInfoCache{ + infos: make(map[string]ModelInfo), + client: &http.Client{Timeout: modelInfoFetchTimeout}, + } +} + +// Get returns the model info for a given ID, or zero-value if unknown. +// Tries exact match first, then fuzzy match by name. +func (c *ModelInfoCache) Get(modelID string) (ModelInfo, bool) { + c.mu.RLock() + defer c.mu.RUnlock() + lower := strings.ToLower(modelID) + if info, ok := c.infos[lower]; ok { + return info, true + } + // Fuzzy: last path component normalized + lastPath := lower + if i := strings.LastIndex(lower, "/"); i >= 0 { + lastPath = lower[i+1:] + } + for k, info := range c.infos { + // Try matching last path of stored key + storedLast := k + if i := strings.LastIndex(k, "/"); i >= 0 { + storedLast = k[i+1:] + } + if normalizeModelKey(lastPath) == normalizeModelKey(storedLast) { + return info, true + } + } + return ModelInfo{}, false +} + +// normalizeModelKey strips hyphens, dots, and lowercases for fuzzy comparison. +func normalizeModelKey(s string) string { + s = strings.ReplaceAll(s, "-", "") + s = strings.ReplaceAll(s, ".", "") + s = strings.ReplaceAll(s, "_", "") + return strings.ToLower(s) +} + +// Refresh scrapes the docs page and rebuilds the cache. +func (c *ModelInfoCache) Refresh() error { + ctx, cancel := context.WithTimeout(context.Background(), modelInfoFetchTimeout) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, modelsDocURL, nil) + if err != nil { + return err + } + req.Header.Set("User-Agent", "command-code-proxy/1.0") + + resp, err := c.client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return &ModelInfoFetchError{Status: resp.StatusCode} + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + + infos := scrapeModelInfo(string(body)) + + c.mu.Lock() + c.infos = infos + c.fetchedAt = time.Now() + c.mu.Unlock() + + log.Printf("[modelinfo] refreshed: %d models", len(infos)) + return nil +} + +// ModelInfoFetchError returned on non-200 from upstream docs page. +type ModelInfoFetchError struct { + Status int +} + +func (e *ModelInfoFetchError) Error() string { + return "model info fetch failed: status " + http.StatusText(e.Status) +} + +// StartBackgroundRefresh launches a goroutine that periodically refreshes info. +func (c *ModelInfoCache) StartBackgroundRefresh() { + go func() { + if err := c.Refresh(); err != nil { + log.Printf("[modelinfo] initial refresh failed (descriptions will be empty): %v", err) + } + ticker := time.NewTicker(modelInfoFetchInterval) + defer ticker.Stop() + for range ticker.C { + if err := c.Refresh(); err != nil { + log.Printf("[modelinfo] refresh failed (keeping existing): %v", err) + } + } + }() +} + +// scrapeModelInfo parses the docs HTML and extracts (id, name, description, capabilities) +// for each model. Returns a map keyed by lowercase model ID. +func scrapeModelInfo(html string) map[string]ModelInfo { + infos := make(map[string]ModelInfo) + + // Pattern matches rows in the docs models table: + // model-id ... Name best-for description capabilities + rowPattern := regexp.MustCompile( + `]*>([^<]+).*?]*>\s*([A-Z][^<]+?)\s*\s*]*>([^<]+?)\s*]*>([^<]+?)`, + ) + matches := rowPattern.FindAllStringSubmatch(html, -1) + for _, m := range matches { + if len(m) < 5 { + continue + } + id := strings.TrimSpace(m[1]) + name := strings.TrimSpace(m[2]) + desc := strings.TrimSpace(m[3]) + caps := strings.TrimSpace(m[4]) + + // Skip placeholder/empty + if strings.HasPrefix(id, "-") || id == "" { + continue + } + if strings.Contains(desc, "&") { + desc = strings.ReplaceAll(desc, "&", "&") + } + + infos[strings.ToLower(id)] = ModelInfo{ + ID: id, + DisplayName: name, + Description: desc, + Capabilities: caps, + } + } + + if len(infos) == 0 { + log.Printf("[modelinfo] WARNING: no models parsed from docs page — HTML may have changed") + } + return infos +} + +// AttachInfo enriches an OpenAIModel with display name and description +// if available in the cache. Mutates the model in-place. +func (c *ModelInfoCache) AttachInfo(model *api.OpenAIModel) { + if c == nil || model == nil { + return + } + info, ok := c.Get(model.ID) + if !ok { + // Try hardcoded fallbacks for models not in the docs table + if fallback, exists := hardcodedInfo[strings.ToLower(model.ID)]; exists { + model.DisplayName = fallback.DisplayName + model.Description = fallback.Description + model.Capabilities = fallback.Capabilities + } + return + } + model.DisplayName = info.DisplayName + model.Description = info.Description + if info.Capabilities != "" { + model.Capabilities = info.Capabilities + } +} + +// hardcodedInfo provides descriptions for models that exist upstream but aren't +// listed in the public docs page. These are kept minimal and conservative. +var hardcodedInfo = map[string]ModelInfo{ + "claude-haiku-4-5-20251001": { + ID: "claude-haiku-4-5-20251001", + DisplayName: "Claude Haiku 4.5", + Description: "fast, compact model for high-throughput tasks", + Capabilities: "text, vision", + }, + "zai-org/glm-5.2": { + ID: "zai-org/GLM-5.2", + DisplayName: "GLM 5.2", + Description: "extended-context autonomous coding agent", + Capabilities: "text", + }, +} diff --git a/internal/proxy/pricing.go b/internal/proxy/pricing.go new file mode 100644 index 0000000..48481ec --- /dev/null +++ b/internal/proxy/pricing.go @@ -0,0 +1,305 @@ +package proxy + +import ( + "context" + "io" + "log" + "net/http" + "regexp" + "strings" + "sync" + "time" +) + +const ( + // pricingURL is the public pricing page. We scrape deals from it. + pricingURL = "https://commandcode.ai/docs/resources/pricing-limits" + + // pricingFetchInterval is how often we re-scrape pricing/deals. + pricingFetchInterval = 24 * time.Hour + + // pricingFetchTimeout is the per-request timeout. + pricingFetchTimeout = 15 * time.Second +) + +// Deal represents a single pricing deal/discount on a model. +type Deal struct { + Model string `json:"model"` // model identifier (matches upstream ID) + Multiplier string `json:"multiplier"` // e.g. "4x", "2x", "99% off" + Description string `json:"description"` // human-readable description + Status string `json:"status"` // "permanent" or expiration date +} + +// PricingCache holds scraped deal data with thread-safe access. +type PricingCache struct { + mu sync.RWMutex + deals map[string]Deal // key: model identifier (lowercase) + fetchedAt time.Time + client *http.Client +} + +// NewPricingCache creates an empty pricing cache. +func NewPricingCache() *PricingCache { + return &PricingCache{ + deals: make(map[string]Deal), + client: &http.Client{Timeout: pricingFetchTimeout}, + } +} + +// GetDeal returns the deal for a model, or zero-value Deal if no deal applies. +// Comparison is case-insensitive and matches: +// - exact ID match +// - last path component (e.g. "Qwen/Qwen3.7-Max" matches "qwen-3.7-max") +// - substring match (e.g. "nvidia/nemotron-3-ultra-550b-a55b" matches "nemotron-3-ultra") +func (c *PricingCache) GetDeal(modelID string) (Deal, bool) { + c.mu.RLock() + defer c.mu.RUnlock() + lowerID := strings.ToLower(modelID) + + // Try exact match first + if d, ok := c.deals[lowerID]; ok { + return d, true + } + + // Try fuzzy matching against all deal keys. + for k, d := range c.deals { + // Extract last path component of model ID + lastPath := lowerID + if i := strings.LastIndex(lowerID, "/"); i >= 0 { + lastPath = lowerID[i+1:] + } + + // Match strategies (in priority order): + // 1. last path equals key + // 2. last path contains key (e.g., "qwen3.7-max" contains "qwen-3.7-max") + // OR key contains last path (e.g., "minimax-m3" contains "minimax") + // 3. Normalize hyphens <-> dots + if lastPath == k { + return d, true + } + if strings.Contains(lastPath, k) || strings.Contains(k, lastPath) { + return d, true + } + // Try replacing dots <-> dashes + alt1 := strings.ReplaceAll(lastPath, ".", "-") + alt2 := strings.ReplaceAll(lastPath, "-", ".") + if alt1 == k || alt2 == k { + return d, true + } + if strings.Contains(alt1, k) || strings.Contains(alt2, k) { + return d, true + } + // Try replacing dashes with nothing (e.g., "qwen-3.7-max" matches "qwen3.7-max") + stripLast := strings.ReplaceAll(lastPath, "-", "") + stripKey := strings.ReplaceAll(k, "-", "") + if stripLast == stripKey { + return d, true + } + } + return Deal{}, false +} + +// Refresh scrapes the pricing page and rebuilds the deal cache. +// On error, logs but keeps the existing cache. +func (c *PricingCache) Refresh() error { + ctx, cancel := context.WithTimeout(context.Background(), pricingFetchTimeout) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, pricingURL, nil) + if err != nil { + return err + } + req.Header.Set("User-Agent", "command-code-proxy/1.0") + + resp, err := c.client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return &PricingFetchError{Status: resp.StatusCode, Body: strings.TrimSpace(string(body))} + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + + deals := scrapeDeals(string(body)) + + c.mu.Lock() + c.deals = deals + c.fetchedAt = time.Now() + c.mu.Unlock() + + log.Printf("[pricing] refreshed: %d deals", len(deals)) + return nil +} + +// StartBackgroundRefresh launches a goroutine that periodically refreshes pricing. +func (c *PricingCache) StartBackgroundRefresh() { + go func() { + if err := c.Refresh(); err != nil { + log.Printf("[pricing] initial refresh failed (deals will be empty): %v", err) + } + ticker := time.NewTicker(pricingFetchInterval) + defer ticker.Stop() + for range ticker.C { + if err := c.Refresh(); err != nil { + log.Printf("[pricing] refresh failed (keeping existing deals): %v", err) + } + } + }() +} + +// PricingFetchError is returned when the upstream pricing page returns a non-200. +type PricingFetchError struct { + Status int + Body string +} + +func (e *PricingFetchError) Error() string { + return "pricing fetch failed: status " + http.StatusText(e.Status) +} + +// scrapeDeals parses the HTML for DEAL blocks and extracts model + description. +// This is intentionally lenient — Command Code may change the HTML structure, +// and we'd rather get partial data than no data. +// +// Robustness strategy: +// - Multiple regex patterns tried in order +// - Falls back to single-model extraction if multi-model match fails +// - Returns empty map on any parse failure (never errors) +// - Logs warnings for unexpected structures so we notice breakage +func scrapeDeals(html string) map[string]Deal { + deals := make(map[string]Deal) + + // Strategy: parse the HTML block-by-block. + // Each deal block is between DEAL and the next or end of deals section. + // We extract the inner HTML of each ... deal element. + dealBlockRe := regexp.MustCompile( + `(?s)DEAL(.*?)`, + ) + + dealBlocks := dealBlockRe.FindAllStringSubmatch(html, -1) + log.Printf("[pricing] found %d deal blocks in HTML", len(dealBlocks)) + if len(dealBlocks) == 0 { + log.Printf("[pricing] WARNING: no deal blocks found — HTML structure may have changed") + return deals + } + + // Filter out bogus matches (e.g., footer/nav text accidentally matched). + // Real deal blocks are short (< 2000 chars) and contain at least one tag. + var realBlocks [][]string + for _, b := range dealBlocks { + if len(b[1]) > 2000 { + continue // too long — likely a nav menu or footer + } + if strings.Contains(b[1], ", len<2000)", len(realBlocks)) + dealBlocks = realBlocks + if len(dealBlocks) == 0 { + return deals + } + + codeRe := regexp.MustCompile(`]*>([^<]+)`) + statusRe := regexp.MustCompile(`(permanent|through\s+\w+\s+\d+,\s+\d{4})`) + + for _, block := range dealBlocks { + if len(block) < 2 { + continue + } + inner := block[1] + + // Find all model codes in this deal + codes := codeRe.FindAllStringSubmatch(inner, -1) + if len(codes) == 0 { + continue + } + + // Find status + statusMatch := statusRe.FindStringSubmatch(inner) + status := "unknown" + if len(statusMatch) >= 2 { + status = statusMatch[1] + } + + // Description: text after the LAST in the deal, before status marker + lastCodeEnd := strings.LastIndex(inner, "") + if lastCodeEnd < 0 { + continue + } + descStart := lastCodeEnd + len("") + + // Find status in remaining text + descText := inner[descStart:] + statusIdx := strings.Index(descText, status) + if statusIdx < 0 { + // Status might be in nested span; take up to next + spanEnd := strings.Index(descText, "") + if spanEnd < 0 { + spanEnd = len(descText) + } + statusIdx = spanEnd + } + desc := descText[:statusIdx] + desc = stripHTMLTags(desc) + desc = strings.TrimSpace(desc) + + mult := extractMultiplier(desc) + + // Apply to all models in this deal + for _, c := range codes { + if len(c) < 2 { + continue + } + model := strings.TrimSpace(c[1]) + deal := Deal{ + Model: model, + Multiplier: mult, + Description: desc, + Status: status, + } + deals[strings.ToLower(model)] = deal + } + } + + if len(deals) == 0 { + log.Printf("[pricing] WARNING: no deals parsed from %d blocks — extraction failed", len(dealBlocks)) + } + + return deals +} + +// stripHTMLTags removes HTML tags from a string (best-effort, no full parser). +func stripHTMLTags(s string) string { + tag := regexp.MustCompile(`<[^>]+>`) + s = tag.ReplaceAllString(s, "") + // Collapse whitespace + ws := regexp.MustCompile(`\s+`) + s = ws.ReplaceAllString(s, " ") + return s +} + +// extractMultiplier pulls the multiplier string (e.g., "4×", "2x", "99% off") from a description. +func extractMultiplier(desc string) string { + // Try × (Unicode), x, or % + patterns := []struct { + re *regexp.Regexp + tmpl func([]string) string + }{ + {regexp.MustCompile(`(\d+(?:\.\d+)?)\s*[x×]`), func(m []string) string { return m[1] + "x" }}, + {regexp.MustCompile(`(\d+)%\s*off`), func(m []string) string { return m[1] + "% off" }}, + } + for _, p := range patterns { + if m := p.re.FindStringSubmatch(desc); m != nil { + return p.tmpl(m) + } + } + return "" +} diff --git a/internal/proxy/proxy.go b/internal/proxy/proxy.go index cecd881..a7ebc5f 100644 --- a/internal/proxy/proxy.go +++ b/internal/proxy/proxy.go @@ -60,19 +60,34 @@ func normalizeFinishReason(reason string) string { // Proxy struct type Proxy struct { - APIKey string - BaseURL string - Client *http.Client - Debug bool + APIKey string + BaseURL string + Client *http.Client + Debug bool + Models *ModelCache + Pricing *PricingCache + ModelInfo *ModelInfoCache } // NewProxy creates a new proxy instance func NewProxy(apiKey string) *Proxy { - return &Proxy{ - APIKey: apiKey, - BaseURL: defaultBaseURL, - Client: &http.Client{Timeout: defaultTimeout}, + pricing := NewPricingCache() + modelInfo := NewModelInfoCache() + p := &Proxy{ + APIKey: apiKey, + BaseURL: defaultBaseURL, + Client: &http.Client{Timeout: defaultTimeout}, + Models: NewModelCache(pricing, modelInfo), + Pricing: pricing, + ModelInfo: modelInfo, + } + // Kick off background refreshes — don't block startup + pricing.StartBackgroundRefresh() + modelInfo.StartBackgroundRefresh() + if apiKey != "" { + p.Models.StartBackgroundRefresh(apiKey) } + return p } // BuildRequest builds the CommandCode request body @@ -95,6 +110,12 @@ func (p *Proxy) BuildRequest(openAIReq api.OpenAIChatRequest) (api.CCRequestBody tools := ConvertTools(openAIReq.Tools) + // Map reasoning effort + var reasoning string + if openAIReq.Reasoning != nil { + reasoning = string(*openAIReq.Reasoning) + } + ccBody := api.CCRequestBody{ Config: api.CCConfig{ WorkingDir: ".", @@ -118,6 +139,7 @@ func (p *Proxy) BuildRequest(openAIReq api.OpenAIChatRequest) (api.CCRequestBody MaxTokens: maxTokens, Temperature: temperature, Stream: true, + Reasoning: reasoning, }, ThreadID: uuid.New().String(), } @@ -668,30 +690,79 @@ func responseItemsToMessages(items []any) []api.OpenAIMessage { // HandleModels handles the /v1/models endpoint func (p *Proxy) HandleModels(w http.ResponseWriter, r *http.Request) { + // Use the dynamic cache; fall back to static list if cache is empty/unavailable. + cached := p.Models.Get() + + // If cache returned the static fallback (empty cache triggers fallback in Get), + // still try to refresh in the background so the next request gets fresh data. + if p.Models.IsStale() && !p.Models.fetchingNow { + go func() { + if err := p.Models.Refresh(p.APIKey); err != nil { + log.Printf("[models] background refresh failed: %v", err) + } + }() + } + models := api.OpenAIModelList{ Object: "list", - Data: []api.OpenAIModel{ - // MoonshotAI - {ID: "moonshotai/Kimi-K2.6", Object: "model", Created: 0, OwnedBy: "moonshotai"}, - {ID: "moonshotai/Kimi-K2.5", Object: "model", Created: 0, OwnedBy: "moonshotai"}, - // ZhipuAI - {ID: "zai-org/GLM-5.1", Object: "model", Created: 0, OwnedBy: "zhipuai"}, - {ID: "zai-org/GLM-5", Object: "model", Created: 0, OwnedBy: "zhipuai"}, - // MiniMaxAI - {ID: "MiniMaxAI/MiniMax-M2.7", Object: "model", Created: 0, OwnedBy: "minimaxai"}, - {ID: "MiniMaxAI/MiniMax-M2.5", Object: "model", Created: 0, OwnedBy: "minimaxai"}, - // DeepSeek - {ID: "deepseek/deepseek-v4-pro", Object: "model", Created: 0, OwnedBy: "deepseek"}, - {ID: "deepseek/deepseek-v4-flash", Object: "model", Created: 0, OwnedBy: "deepseek"}, - // Qwen - {ID: "Qwen/Qwen3.6-Max-Preview", Object: "model", Created: 0, OwnedBy: "qwen"}, - {ID: "Qwen/Qwen3.6-Plus", Object: "model", Created: 0, OwnedBy: "qwen"}, - // StepFun - {ID: "stepfun/Step-3.5-Flash", Object: "model", Created: 0, OwnedBy: "stepfun"}, - // Google - {ID: "google/gemini-3.1-flash-lite", Object: "model", Created: 0, OwnedBy: "google"}, - }, + Data: cached, } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(models) } + +// getStaticModels returns the hardcoded fallback list of models. Used when +// the dynamic cache is empty and we haven't been able to fetch from upstream. +// Context lengths are looked up from contextmap.go so we have one source of truth. +func getStaticModels() []api.OpenAIModel { + ids := []string{ + // MoonshotAI + "moonshotai/Kimi-K2.7-Code", "moonshotai/Kimi-K2.7-Code-Highspeed", + // ZhipuAI + "zai-org/GLM-5.2", "zai-org/GLM-5.1", "zai-org/GLM-5", + // MiniMaxAI + "MiniMaxAI/MiniMax-M3", + "MiniMaxAI/MiniMax-M2.7", "MiniMaxAI/MiniMax-M2.5", + // DeepSeek + "deepseek/deepseek-v4-pro", "deepseek/deepseek-v4-flash", + // Qwen + "Qwen/Qwen3.6-Max-Preview", "Qwen/Qwen3.6-Plus", + "Qwen/Qwen3.7-Max", "Qwen/Qwen3.7-Plus", + // StepFun + "stepfun/Step-3.7-Flash", "stepfun/Step-3.5-Flash", + // Xiaomi + "xiaomi/mimo-v2.5-pro", "xiaomi/mimo-v2.5", + // NVIDIA + "nvidia/nemotron-3-ultra-550b-a55b", + // Anthropic + "claude-sonnet-4-6", "claude-fable-5", + "claude-opus-4-8", "claude-opus-4-7", "claude-opus-4-6", + "claude-haiku-4-5", "claude-haiku-4-5-20251001", + // OpenAI + "gpt-5.5", "gpt-5.4", "gpt-5.3-codex", "gpt-5.4-mini", + // Google + "google/gemini-3.5-flash", "google/gemini-3.1-flash-lite", + } + out := make([]api.OpenAIModel, 0, len(ids)) + for _, id := range ids { + m := api.OpenAIModel{ + ID: id, + Object: "model", + Created: 0, + OwnedBy: inferOwner(id), + ContextLength: ContextLengthFor(id), + } + out = append(out, m) + } + return out +} + +// inferOwner extracts the provider name from a model ID like "anthropic/claude-...". +func inferOwner(id string) string { + for i, c := range id { + if c == '/' { + return id[:i] + } + } + return "unknown" +}