From bf9580a83d67a7f4c349b226b921203574523f77 Mon Sep 17 00:00:00 2001 From: Mohammad Ausaf Date: Thu, 2 Jul 2026 12:05:09 +0530 Subject: [PATCH 1/2] fix: harden impact LLM JSON parsing --- plugins/cfg_impact/cfg_impact/overview.py | 19 ++++++++++++-- .../cfg_impact/cfg_impact/providers/base.py | 8 +++--- tests/test_impact_boundary.py | 26 +++++++++++++++++++ 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/plugins/cfg_impact/cfg_impact/overview.py b/plugins/cfg_impact/cfg_impact/overview.py index 1747726..4382cbd 100644 --- a/plugins/cfg_impact/cfg_impact/overview.py +++ b/plugins/cfg_impact/cfg_impact/overview.py @@ -4,6 +4,7 @@ import asyncio import json +import re import sys from typing import Any @@ -106,7 +107,7 @@ async def overview_with_optional_llm( payload, json_dumps=lambda payload: json.dumps(payload, indent=2, sort_keys=True), temperature=0.1, - max_tokens=1100, + max_tokens=4096, ) parsed = _parse_jsonish(result.get("content", "")) overview_data["llm"] = { @@ -431,5 +432,19 @@ def _parse_jsonish(text: str) -> dict[str, Any] | None: try: data = json.loads(raw) except json.JSONDecodeError: - return None + return _parse_partial_json_fields(raw) return data if isinstance(data, dict) else None + + +def _parse_partial_json_fields(raw: str) -> dict[str, Any] | None: + parsed: dict[str, Any] = {} + for key in ("summary", "behavior_change", "blast_radius", "risk_level", "rollback_note"): + match = re.search(rf'"{key}"\s*:\s*"((?:\\.|[^"\\])*)"', raw, flags=re.DOTALL) + if match: + try: + parsed[key] = json.loads(f'"{match.group(1)}"') + except json.JSONDecodeError: + parsed[key] = match.group(1) + if '"unknowns"' in raw: + parsed.setdefault("unknowns", []) + return parsed or None diff --git a/plugins/cfg_impact/cfg_impact/providers/base.py b/plugins/cfg_impact/cfg_impact/providers/base.py index 338db20..fcf0023 100644 --- a/plugins/cfg_impact/cfg_impact/providers/base.py +++ b/plugins/cfg_impact/cfg_impact/providers/base.py @@ -49,10 +49,10 @@ async def narrate(self, payload: dict[str, Any], **kwargs: Any) -> dict[str, Any "upstream/downstream roles, overlapping responsibilities), naming the specific " "config_ids. Do NOT say 'impact unknown' when the diff is present, read it. Only list a " "genuine unknown if it truly cannot be inferred from the provided data.\n" - "Return concise JSON with keys: summary (one concrete sentence), behavior_change (what " - "the agent will now do differently, specifically), blast_radius (which named configs or " - "consumers are affected and why), risk_level (low|medium|high), rollback_note, unknowns " - "(array of only real unknowns)." + "Return terse JSON with keys: summary (one concrete sentence), behavior_change (one " + "short sentence), blast_radius (one short sentence naming affected configs), risk_level " + "(low|medium|high), rollback_note (one short sentence), unknowns (array of only real " + "unknowns). Return raw JSON only; do not wrap it in Markdown or code fences." ), }, {"role": "user", "content": json_dumps(payload)}, diff --git a/tests/test_impact_boundary.py b/tests/test_impact_boundary.py index 8f1ee1a..c663686 100644 --- a/tests/test_impact_boundary.py +++ b/tests/test_impact_boundary.py @@ -132,6 +132,32 @@ def test_parse_jsonish_strips_fenced_json() -> None: assert _parse_jsonish('```json\n{"summary":"ok"}\n```') == {"summary": "ok"} +def test_parse_jsonish_recovers_partial_json_fields() -> None: + sys.path.insert(0, str(ROOT / "plugins" / "cfg_impact")) + try: + from cfg_impact.overview import _parse_jsonish + finally: + sys.path.pop(0) + + broken = ( + '{"summary":"Refund agent now expands courtesy credits.",' + '"behavior_change":"Approves larger refunds.",' + '"blast_radius":"Affects refund_resolution and support_orchestrator.",' + '"risk_level":"high",' + '"rollback_note":"Restore the previous refund policy.",' + '"unknowns":["truncated"' + ) + + assert _parse_jsonish(broken) == { + "summary": "Refund agent now expands courtesy credits.", + "behavior_change": "Approves larger refunds.", + "blast_radius": "Affects refund_resolution and support_orchestrator.", + "risk_level": "high", + "rollback_note": "Restore the previous refund policy.", + "unknowns": [], + } + + def test_changed_string_values_reads_before_after_keys() -> None: sys.path.insert(0, str(ROOT / "plugins" / "cfg_impact")) try: From db520020d1e05b0a31c36a24ee563e1e50cec2c3 Mon Sep 17 00:00:00 2001 From: Mohammad Ausaf Date: Thu, 2 Jul 2026 12:14:35 +0530 Subject: [PATCH 2/2] docs: update PyPI install path and scrub private notes --- .gitignore | 9 + README.md | 35 +- docs/ADAPTERS.md | 4 +- docs/AGENTS.md | 4 +- docs/README.md | 8 - docs/SPEC_CORE.md | 2 +- docs/archive/spec-v0.1.md | 563 -------------------------- docs/project-notes/findings.md | 128 ------ docs/project-notes/handoff.json | 107 ----- docs/project-notes/handoff.md | 178 -------- docs/project-notes/review-findings.md | 153 ------- plugins/cfg_impact/README.md | 19 + 12 files changed, 56 insertions(+), 1154 deletions(-) delete mode 100644 docs/archive/spec-v0.1.md delete mode 100644 docs/project-notes/findings.md delete mode 100644 docs/project-notes/handoff.json delete mode 100644 docs/project-notes/handoff.md delete mode 100644 docs/project-notes/review-findings.md diff --git a/.gitignore b/.gitignore index eb2a321..f86466f 100644 --- a/.gitignore +++ b/.gitignore @@ -116,6 +116,15 @@ activemq-data/ .cfg/ /.cfg.toml +# Local project notes and recording drafts +docs/project-notes/ +*.mp4 +*.mov +*.webm +*.mkv +*.avi +*.m4v + # Local secrets and overrides *.local .streamlit/secrets.toml diff --git a/README.md b/README.md index 2da6b20..0b6bc0e 100644 --- a/README.md +++ b/README.md @@ -103,25 +103,39 @@ system, or schema migration tool. ## Install -From a checkout: +From PyPI: + +- [`cfgit`](https://pypi.org/project/cfgit/) +- [`cfgit-impact`](https://pypi.org/project/cfgit-impact/) for optional impact narration ```bash -python -m venv .venv -. .venv/bin/activate -pip install -e '.[mongo,postgres,mcp,dev]' -pip install -e plugins/cfg_impact +pip install 'cfgit[mongo,postgres,mcp]' ``` Minimal install for Mongo only: ```bash -pip install -e '.[mongo]' +pip install 'cfgit[mongo]' ``` Minimal install for Postgres only: ```bash -pip install -e '.[postgres]' +pip install 'cfgit[postgres]' +``` + +Optional impact plugin: + +```bash +pip install cfgit-impact +``` + +If you use `pipx`, install cfgit first and inject the optional plugin into the +same isolated environment: + +```bash +pipx install 'cfgit[mongo,postgres,mcp]' +pipx inject cfgit cfgit-impact ``` ## Quick start @@ -453,7 +467,7 @@ explains in plain language what the change does, what it ripples into, and how t roll it back: ```bash -pip install -e plugins/cfg_impact +pip install cfgit-impact cfg impact agent_configs:agent_planner --llm --json ``` @@ -526,10 +540,7 @@ against a safe environment. - [docs/AGENTS.md](docs/AGENTS.md): MCP, skill, and impact plugin usage - [docs/SPEC_CORE.md](docs/SPEC_CORE.md): project framing and v1 scope - [docs/SPEC.md](docs/SPEC.md): deeper engine reference -- [docs/README.md](docs/README.md): full documentation index, including archived project notes and historical specs -- [docs/project-notes/findings.md](docs/project-notes/findings.md): implementation findings from the origin build -- [docs/project-notes/handoff.md](docs/project-notes/handoff.md): archived handoff notes -- [docs/archive/spec-v0.1.md](docs/archive/spec-v0.1.md): original historical spec +- [docs/README.md](docs/README.md): full documentation index ## Development diff --git a/docs/ADAPTERS.md b/docs/ADAPTERS.md index c9f5a0b..4390dc2 100644 --- a/docs/ADAPTERS.md +++ b/docs/ADAPTERS.md @@ -8,7 +8,7 @@ details. Core owns hashing, history semantics, drift detection, and restore logi Install: ```bash -pip install -e '.[mongo]' +pip install 'cfgit[mongo]' ``` Configuration: @@ -45,7 +45,7 @@ Mongo notes: Install: ```bash -pip install -e '.[postgres]' +pip install 'cfgit[postgres]' ``` Configuration: diff --git a/docs/AGENTS.md b/docs/AGENTS.md index ac0f57f..636758b 100644 --- a/docs/AGENTS.md +++ b/docs/AGENTS.md @@ -54,7 +54,7 @@ tokens through MCP unless that client/session is trusted for secrets. Install the MCP extra: ```bash -pip install -e '.[mcp]' +pip install 'cfgit[mcp]' ``` Run: @@ -120,7 +120,7 @@ as token/DB-principal based rather than author-string based. Install: ```bash -pip install -e plugins/cfg_impact +pip install cfgit-impact ``` Deterministic impact: diff --git a/docs/README.md b/docs/README.md index ebc4e18..efca3a2 100644 --- a/docs/README.md +++ b/docs/README.md @@ -12,11 +12,3 @@ Technical references: - [Core spec](SPEC_CORE.md): framing and v1 scope. - [Engine spec](SPEC.md): detailed engine and adapter reference. - -Project notes and historical artifacts: - -- [Implementation findings](project-notes/findings.md): source-code and data-shape findings from the origin implementation. -- [Review findings](project-notes/review-findings.md): implementation audit defects and fix order. -- [Session handoff](project-notes/handoff.md): archived implementation handoff. -- [Session handoff JSON](project-notes/handoff.json): machine-readable handoff archive. -- [Original v0.1 spec](archive/spec-v0.1.md): early historical spec. diff --git a/docs/SPEC_CORE.md b/docs/SPEC_CORE.md index 521456e..d710bfc 100644 --- a/docs/SPEC_CORE.md +++ b/docs/SPEC_CORE.md @@ -189,7 +189,7 @@ writers = ["alice@*", "bob@*", "carol@*"] admin_actions = ["init", "restore_system"] ``` -> **First-task gate (archived in `docs/project-notes/handoff.md`):** before building, pin each collection's real id + live-rule against the actual data. `modelgarden_models` already has duplicate `model_id` values in local dev data, while `model_path` is unique and runtime-authoritative in the code paths checked. Use `model_path` unless production data proves otherwise. Getting this wrong makes the tool wrong on day one. +> **First-task gate for any real deployment:** before tracking a collection, pin the stable id field and live-record rule against actual data. If multiple rows or documents can share the apparent id, configure a different id field or an explicit `live_when` selector before importing history. Getting this wrong makes the tool wrong on day one. --- diff --git a/docs/archive/spec-v0.1.md b/docs/archive/spec-v0.1.md deleted file mode 100644 index ec48427..0000000 --- a/docs/archive/spec-v0.1.md +++ /dev/null @@ -1,563 +0,0 @@ -# `cfg`: Config Version Control: Full Specification - -Version 0.1 (build-ready draft). A git-shaped version-control tool for **database-resident config documents** (LLM agent configs first; any JSON-shaped config doc generally). Storage-agnostic core; Mongo is the first backend. - ---- - -## 0. Glossary (precise terms used throughout) - -- **Config doc**: one versioned unit: a JSON object identified by a `config_id` (e.g. `agent_planner`). Lives in the **runtime store** as its *current* value. -- **Runtime store**: the live DB the application reads at runtime (e.g. Mongo `agent_configs`). Holds **only current** docs. Knows nothing about versioning. Untouched by `cfg` except via `put_config`. -- **History store**: the tool-owned record of every version ever (e.g. Mongo `config_history`). The source of truth for versioning. The application never reads it. -- **Version / entry**: one immutable record in the history store representing the state of one config doc at one moment. -- **hash**: content hash of a config doc; the stable id of a version (git-blob analog). -- **seq**: monotonically increasing integer per `config_id` (human-friendly version number; `planner@7`). -- **HEAD(config_id)**: the latest committed version of a config (highest `seq`). Analogous to git HEAD. -- **live(config_id)**: what the *runtime store* currently holds for a config. May differ from HEAD if someone bypassed the tool (= **dirty**). -- **as-of T**: the reconstructed system (or single-config) state at timestamp T, derived by query, not stored. -- **Adapter**: a class implementing `StorageAdapter` for one DB technology. The only DB-specific code. - ---- - -## 1. Architecture: three layers, hard boundaries - -``` -┌──────────────────────────────────────────────────────────────┐ -│ INTERFACES (3 doors, one engine) │ -│ • Porcelain CLI `cfg ` (humans) │ -│ • Plumbing/JSON `cfg --json` (scripts/agents) │ -│ • Agent surface Claude skill + MCP server (Claude/Codex) │ -├──────────────────────────────────────────────────────────────┤ -│ CORE ENGINE (pure logic, NO db imports) │ -│ commit · log · diff · show · status · restore · tag · etc. │ -│ hashing · as-of reconstruction · dirty detection │ -│ depends ONLY on the StorageAdapter interface │ -├──────────────────────────────────────────────────────────────┤ -│ STORAGE ADAPTER (interface) │ -│ MongoAdapter (now) · PostgresAdapter (later) · … │ -└──────────────────────────────────────────────────────────────┘ -``` - -**Inviolable rules:** -1. Core imports no DB driver. It imports only `StorageAdapter`. (Enforce in CI: grep the core package for `pymongo|psycopg|sqlalchemy` → fail.) -2. All three interfaces call the **same** core functions. The CLI is a thin arg-parser; the MCP server is a thin RPC wrapper. No business logic in any interface. -3. The history schema (Section 3) is DB-neutral. Every adapter stores the same logical fields. - ---- - -## 2. The StorageAdapter interface (the DB seam) - -Language-neutral contract. Mongo first; Postgres/any later by writing one class. **No core change** to add a backend. - -```python -class StorageAdapter(Protocol): - # ── runtime store (current docs the app reads) ── - def get_config(self, config_id: str) -> dict | None: ... - # current runtime doc, or None if absent. - def put_config(self, config_id: str, doc: dict) -> None: ... - # overwrite the runtime doc. The "apply". - - def list_config_ids(self) -> list[str]: ... - # all config_ids present in the runtime store (for `status`, `--as-of` over all). - - # ── history store (versions) ── - def append_history(self, entry: dict) -> None: ... - # insert ONE immutable history entry (schema §3). Never updates an existing entry - # EXCEPT the git_sha backfill (see set_git_sha). - def query_history( - self, *, - config_id: str | None = None, # None = across all configs - as_of: datetime | None = None, # latest entry per config with ts <= as_of - tag: str | None = None, # entries carrying this tag - ref: str | None = None, # a specific version: "planner@7" or a hash - limit: int | None = None, - order: str = "desc", # by (config_id, seq) - ) -> list[dict]: ... - def get_head(self, config_id: str) -> dict | None: ... - # the highest-seq history entry for a config (HEAD). None if never committed. - def next_seq(self, config_id: str) -> int: ... - # atomic; returns get_head().seq + 1 (or 1). - - # ── linkage + labels ── - def set_git_sha(self, version_hash: str, git_sha: str) -> bool: ... - # backfill git_sha onto an existing entry (post-commit hook). Idempotent. True if set. - def add_tag(self, version_hash: str, tag: str) -> None: ... - def remove_tag(self, version_hash: str, tag: str) -> None: ... - def list_tags(self) -> list[dict]: ... # [{tag, config_id, hash, ts}] - - # ── atomicity ── - def commit_apply(self, *, history_entry: dict, new_doc: dict, config_id: str) -> None: ... - # ATOMIC where the backend supports it: append_history(history_entry) AND - # put_config(config_id, new_doc) succeed together or neither does. - # Mongo: multi-doc txn (replica set) or best-effort+compensation if standalone. - # Postgres: single SQL txn. - - # ── meta ── - def ensure_schema(self) -> None: ... # create collections/tables + indexes (idempotent). - def backend_name(self) -> str: ... # "mongo" | "postgres" | ... -``` - -**Index requirements every adapter MUST create (`ensure_schema`):** -- history: unique `(config_id, seq)`; index `(config_id, ts)`; index `hash` (unique); index `tags` (multikey/GIN); index `git_sha`. -- runtime: whatever the app already has (adapter does not impose). - ---- - -## 3. History entry schema (DB-neutral, exact) - -One entry = the state of ONE config at ONE moment. **Immutable** after write (except `git_sha` backfill + `tags` mutation). - -```jsonc -{ - "_id": "", // adapter-managed - "config_id": "agent_planner", // REQUIRED. which config. - "seq": 7, // REQUIRED. monotonic per config_id, starts at 1. - "hash": "sha256:16hex", // REQUIRED. canonical content hash of `doc` (§4). UNIQUE. - "parent_hash": "sha256:16hex | null", // hash of the prior version (seq-1). null for seq 1. - "doc": { /* full config doc */ },// REQUIRED. FULL snapshot. exactly what put_config writes. - "message": "bumped planner multi-turn", // REQUIRED (non-empty). the "why". - "author": "developer", // REQUIRED. resolved identity (§9). - "ts": "2026-06-21T10:30:00Z", // REQUIRED. UTC ISO-8601. server-trusted, not client. - "op": "commit", // REQUIRED. enum: commit | restore | import | revert - "git_sha": "def456… | null", // code commit live at this moment. backfilled by hook. - "tags": ["june7-good"], // human labels. mutable. - "meta": { // optional, open. - "restored_from": "planner@5", // set when op=restore: which version this re-applied - "tool_version": "cfg/0.1.0", - "hostname": "dev-laptop" - } -} -``` - -**Field rules:** -- `op=commit` → a normal new version (user edited, applied). -- `op=restore` → re-applying an OLD doc as a NEW HEAD version (non-destructive rollback). `doc` = the restored doc; `meta.restored_from` set; `seq` is still the next seq (history only moves forward: like `git revert`, never rewrites). -- `op=import` → first-time ingestion of an already-live doc that predates the tool (the migration; §11). `parent_hash=null`, `message="import baseline"`. -- `seq` is **per config_id**, gapless, assigned via `next_seq` atomically. -- **No diffs stored.** Full docs. (Justification: ~43 configs × few edits/week = trivial volume; restore = copy; zero replay risk. Packing/compaction is a future optimization, never a correctness concern.) - ---- - -## 4. Canonical content hashing (exact algorithm) - -`hash(doc)` must be **stable, order-independent, and exclude runtime-owned noise** so that "same meaningful content ⇒ same hash" (idempotent commits, honest dirty-detection). - -Algorithm: -1. **Strip ignored keys** from a deep copy of `doc` before hashing. Ignored = runtime-managed/volatile fields that the tool does not own and that the app mutates out-of-band. Configurable per project (Section 8 `ignore_fields`). Defaults for our agent_configs: `_id`, `metrics`, `updated_at`, `updated_by`, `created_at`, `created_by`, and any key matching `instructions_backup_*` (the legacy cruft: never versioned, never hashed). -2. **Canonical-JSON serialize** the stripped doc: keys sorted recursively, UTF-8, no insignificant whitespace, numbers in a fixed normal form (ints as ints; floats via shortest round-trip repr; reject NaN/Inf). Arrays preserve order (order is semantic for `tools`, etc.). -3. `sha256` the canonical bytes; take a stable prefix. Store as `"sha256:"`. (16 hex = 64 bits; collision-safe at this scale; full digest retained internally if ever needed.) - -**Why strip before hash:** otherwise `metrics.total_invocations` ticking up would make every config look "dirty" forever and every commit produce a new hash for no real change. Stripping makes hash track *meaningful* content only. - -`live == HEAD` test (dirty detection) = `hash(strip(live_doc)) == HEAD.hash`. - ---- - -## 5. The command surface (porcelain): every command, exhaustively - -Global form: `cfg [global-flags] [args] [flags]` - -**Global flags (apply to all verbs):** -- `--env `: which environment/profile (dev|prod|preview|…); selects the adapter + connection (Section 8). Default: `CFG_ENV` env var, else `dev`. -- `--json`: emit machine-readable JSON to stdout instead of human text. (Plumbing mode.) -- `--quiet` / `-q`: suppress non-essential human output. -- `--yes` / `-y`: assume "yes" to confirmation prompts (for scripts). -- `--config-file `: path to the `cfg` project config (Section 8). Default: walk up for `.cfg.toml`. - -**Exit codes (uniform, agent-critical):** -- `0` success / clean. -- `1` generic error (bad args, not found). -- `2` **dirty / conflict** (status found drift; commit base is stale). Distinct so agents branch on it. -- `3` connection/storage error. -- `4` confirmation declined. - ---- - -### 5.1 `cfg init` -Initialize `cfg` for a project/env: create history collection/tables + indexes via `adapter.ensure_schema()`, write `.cfg.toml` if absent. -``` -cfg init --env dev -``` -Output: backend name, collections/tables ensured, indexes created. Idempotent. - ---- - -### 5.2 `cfg import` (one-time baseline) -Ingest already-live runtime docs as `seq=1` `op=import` baselines so history starts from reality. -``` -cfg import # import ALL runtime config_ids not yet in history -cfg import agent_planner # just one -cfg import --strip-backups # ALSO remove instructions_backup_* keys from the runtime doc on import (cleanup) -``` -Behavior per config: if no history exists → snapshot current live doc as `seq=1, op=import, parent_hash=null, message="import baseline"`. If history exists → skip (idempotent), warn. -`--strip-backups`: additionally `put_config` a cleaned doc (backup keys removed) AND record that as the baseline. **Confirm required** (mutates runtime). The 15 legacy `instructions_backup_*` keys die here, preserved forever in nothing-needed because the import IS the first version. - ---- - -### 5.3 `cfg edit` (stage a change) -Pull live doc → temp file → `$EDITOR` → on save, stage it (write to a local staging area, NOT the DB). -``` -cfg edit agent_planner # opens the doc; large `instructions` as a readable block -``` -- Opens the **stripped** doc (no `_id`/metrics/backup noise) so the editor view is clean. -- On save: validates JSON; stores staged doc in `.cfg/staged/.json`. Nothing hits the DB yet. -- `--from ` skips the editor and stages the given file. -- `cfg edit --abort ` discards staging. - -(Rationale: separates "prepare a change" from "apply it": like git's working tree vs commit. Lets you review with `cfg diff` before `commit`.) - ---- - -### 5.4 `cfg set` (field-level quick edit) -For tiny changes without opening an editor. -``` -cfg set agent_planner client_config.temperature 0.6 -m "lower temp" -cfg set agent_planner tools+= run_validator -m "grant validator" # array append -cfg set agent_planner tools-= old_tool -m "drop tool" # array remove -cfg set agent_planner some.key --delete -m "remove key" -``` -Dotted path into the doc. Stages + (with `-m`) commits in one step. Without `-m`, only stages (then `cfg commit`). Type inference: `true/false/null/int/float` parsed; quote to force string (`'0.6'`). - ---- - -### 5.5 `cfg commit` (the workhorse) -Apply a staged (or `--from`) new version: snapshot prior → write new → record. -``` -cfg commit agent_planner -m "multi-turn fix" -cfg commit agent_planner --from new.json -m "..." -cfg commit --all -m "batch tweak" # commit every staged config (one entry each) -``` -**Algorithm (exact):** -1. Resolve `new_doc` = staged doc for config (or `--from`). Error if nothing staged. -2. `live = adapter.get_config(id)`; `head = adapter.get_head(id)`. -3. **Stale-base / dirty check (optimistic concurrency):** - - If `head` exists and `hash(strip(live)) != head.hash` → **runtime drifted** (someone bypassed the tool since HEAD). STOP, exit `2`, message: *"runtime for `agent_planner` changed outside cfg (live hash X ≠ HEAD Y). Run `cfg status` / `cfg adopt` first."* Unless `--force` (records the bypass as an `import`-flavored entry first, then proceeds; logs loudly). - - This is the CAS that kills silent overwrite: your commit refuses if the live state isn't the HEAD you based on. -4. `new_hash = hash(strip(new_doc))`. If `new_hash == head.hash` → **no-op**, exit `0`, message "nothing to commit (identical to HEAD)". (Idempotent.) -5. Build entry: `seq=next_seq(id)`, `hash=new_hash`, `parent_hash=head.hash or null`, `doc=new_doc`, `message`, `author`, `ts=server_now()`, `op="commit"`, `git_sha=null`. -6. `adapter.commit_apply(history_entry=entry, new_doc=new_doc, config_id=id)`: atomic append + apply. -7. Clear staging for that config. Print `agent_planner@7 (sha256:a1b2c3d4)`. - -`-m` is REQUIRED (non-empty). No silent commits. `--allow-empty-message` exists only for scripts and is discouraged. - ---- - -### 5.6 `cfg status` (dirty-tree / bypass detector) -For each config: compare `live` vs `HEAD`. -``` -cfg status # all configs -cfg status agent_planner # one -``` -States per config: -- `clean`: `hash(strip(live)) == HEAD.hash`. -- `DIRTY`: live differs from HEAD (bypass happened) → show short diff summary. -- `staged`: there's a staged edit not yet committed. -- `untracked`: live doc exists but no history (never imported). -- `missing`: HEAD exists but runtime doc gone. -Exit `2` if ANY config is DIRTY (so CI/agents catch drift). `--json` → `[{config_id, state, live_hash, head_hash, head_seq, staged}]`. - ---- - -### 5.7 `cfg adopt` (reconcile a bypass) -When `status` shows DIRTY (someone hand-edited runtime), record reality as a new version so history catches up. -``` -cfg adopt agent_planner -m "adopt hand-edit (INC-123 hotfix)" -cfg adopt --all -m "reconcile drift" -``` -Snapshots the current `live` doc as a new HEAD entry (`op="import"`, `meta.adopted=true`). After adopt, `status` = clean. (This is how the tool survives the inevitable raw-Mongo edit honestly: it can't prevent the bypass, but `adopt` folds it into history with attribution instead of losing it.) - ---- - -### 5.8 `cfg log` -History views. -``` -cfg log agent_planner # versions of one config (newest first) -cfg log --all # every version of every config (the reflog/safety net) -cfg log --as-of 2026-06-07 # the SYSTEM view: HEAD-as-of-T for every config (one line each) -cfg log agent_planner --as-of 2026-06-07 # what THIS config was at T -cfg log --tag june7-good -cfg log --git # versions linked to a code commit -``` -Human columns: `seq hash ts author op git_sha? message`. `--json` → array of entries (sans `doc` unless `--with-doc`). `-n ` limit. `--oneline`. - ---- - -### 5.9 `cfg diff` -Compare two versions, or live-vs-HEAD. -``` -cfg diff agent_planner @5 @7 # version 5 vs 7 -cfg diff agent_planner a1b2c3 9f8e7d # by hash -cfg diff agent_planner # live vs HEAD (your uncommitted/bypass delta) -cfg diff agent_planner @5 live # version 5 vs current runtime -cfg diff --as-of 2026-06-07 --to now # SYSTEM diff: every config that changed between June7 and now -``` -Output: structured field-level diff (not raw text blob). For the big `instructions` string, default to a **line/section diff** (git-style unified) so it's reviewable; `--semantic` invokes the optional LLM diff (Section 7). `--json` → `{config_id, changes:[{path, op:add|del|mod, before, after}]}`. `--stat` = summary counts only. - ---- - -### 5.10 `cfg show` -Full doc at a version. -``` -cfg show agent_planner @7 -cfg show agent_planner @7 --field instructions # just one field -cfg show agent_planner live -``` - ---- - -### 5.11 `cfg restore` (single + emergent system) -Re-apply an old state as a new HEAD (non-destructive). -``` -# single config -cfg restore agent_planner @5 # restore one config to version 5 -cfg restore agent_planner a1b2c3 -cfg restore agent_planner --as-of 2026-06-07 # this config's state at T - -# SYSTEM-WIDE (emergent: loops all configs) -cfg restore --as-of 2026-06-07 # ALL configs → their June-7 HEAD -cfg restore --tag june7-good # ALL configs → the tagged moment -``` -**Modifiers:** -- `--preview`: do NOT touch the target runtime; apply into the **preview env** instead (Section 6). Prints preview details/URL. (requested preview workflow) -- `--dry-run`: compute and print exactly what would change (per config: HEADnow→target), change nothing. Exit `0`. -- `--only a,b,c` / `--except x`: scope a system restore to a subset. -- `--include-code`: also resolve the git SHA for the chosen moment and check out / trigger a code deploy at that SHA (Section 5.14 linkage). Off by default (config-only restore is the common case). - -**Algorithm: `restore --as-of T` (the emergent loop), exact:** -1. `ids = adapter.list_config_ids()` (∪ any config that has history but no live doc, optionally). -2. For each `id`: `target = query_history(config_id=id, as_of=T, limit=1, order=desc)` → the latest entry with `ts ≤ T`. If none (config didn't exist at T) → skip (and report "did not exist at T"). -3. Build the plan: `[(id, head_now.seq → target.seq, hash delta)]`. If `--dry-run`, print and stop. -4. **Pre-flight dirty check:** if any target config is currently DIRTY vs its HEAD, warn (its live state will be overwritten by the restore; that's usually intended, but surface it). `--force` to proceed without prompt. -5. For each `id` whose `target.hash != head_now.hash`: write `target.doc` to runtime via `commit_apply` with a NEW entry `op="restore"`, `seq=next_seq`, `parent_hash=head_now.hash`, `doc=target.doc`, `meta.restored_from="@"`, `message="restore as-of "` (or user `-m`). -6. Configs already equal to target → skip (no-op, reported). -7. Print summary: N restored, M unchanged, K did-not-exist. - -History only moves forward: a restore is a new commit, fully revertible (you can `cfg restore --as-of ` to undo). Nothing is destroyed. - ---- - -### 5.12 `cfg tag` / `cfg tags` -Human labels for a moment. -``` -cfg tag june7-good --as-of 2026-06-07 # tag the HEAD-as-of-T of EVERY config with this label -cfg tag planner-good agent_planner @7 # tag a single version -cfg tags # list -cfg tag --delete june7-good -``` -A system tag (`--as-of`) attaches the same tag string to the resolved version of each config, so `cfg restore --tag june7-good` rebuilds that exact moment. (Tags decouple humans from juggling timestamps.) - ---- - -### 5.13 `cfg points` (find a moment) -List change-moments to help pick `--as-of`/tag targets. -``` -cfg points --around 2026-06-07 [--window 3d] # entries near a date, all configs -cfg points agent_planner # change-moments of one config -``` -Shows clustered timeline: `ts · config_id · seq · git_sha? · message`. Lets a human eyeball "the June-7 deploy" and grab its timestamp/tag. - ---- - -### 5.14 Git linkage (code ⇄ config) -For changes that ship WITH code. No custom git flag (git won't allow it). Use a **commit trailer + post-commit hook**. - -Author flow: -``` -cfg commit agent_planner -m "multi-turn fix" # → agent_planner@7 (a1b2c3) -git add agent.py -git commit -m "planner multi-turn - -Cfg-Version: agent_planner@7" # trailer; can list multiple -``` -Hook (`.git/hooks/post-commit`, installed by `cfg hooks install`): -1. Read the new commit's message; `git interpret-trailers --parse` to extract all `Cfg-Version:` values. -2. For each `id@seq` (or hash): resolve to a history `hash`, call `adapter.set_git_sha(hash, )`. -3. Idempotent; silent on no trailer. - -Effect: `cfg log --git ` shows configs that shipped with that code; `cfg restore --as-of T --include-code` knows which SHA pairs with the config moment. Config-alone commits leave `git_sha=null` (the majority). - -`cfg hooks install` / `cfg hooks uninstall` manage the hook. (Also offer a `prepare-commit-msg` helper that, if a config was committed in the last N minutes, auto-suggests the `Cfg-Version:` trailer.) - ---- - -### 5.15 Misc -- `cfg whoami`: show resolved author identity + env. -- `cfg config`: get/set `.cfg.toml` values. -- `cfg gc`: (future) compact history (e.g. keep full docs but dedup identical blobs by hash). No-op v0.1. -- `cfg version`: tool version. - ---- - -## 6. Preview environment (`--preview`) - -`--preview` must target something. Define a **preview profile** in `.cfg.toml`: -```toml -[env.preview] -backend = "mongo" -runtime_uri = "mongodb+srv://…/appdb-preview" # a SEPARATE preview db -history_uri = "…" # can share history with dev -deploy = { type = "command", run = "scripts/deploy_preview.sh {git_sha}" } # optional code deploy -``` -`cfg restore --as-of T --preview`: -1. Resolves the same plan as a normal restore but targets the **preview runtime** (`put_config` goes to preview db). -2. If `--include-code` and a `deploy` command is configured, runs it with the resolved `{git_sha}` (e.g. spins a preview container at that SHA). -3. Prints: preview db name, configs applied, git_sha used, and (if the deploy command emits one) a preview URL. -Tiers of support: -- **Minimal (v0.1):** preview = "restore into `*-preview` runtime db; you point your local/staging backend at it." No code deploy orchestration. -- **Full (later):** the `deploy` command does a real ephemeral deploy at the SHA. - ---- - -## 7. Optional LLM semantic layer (off by default) - -Not core. A pluggable hook + a diff mode. -- `cfg diff … --semantic`: sends `{before, after, + the other configs’ summaries}` to an LLM; returns: nature of change, cross-config contradictions, downstream-agent impact, risk level. Human-readable + `--json` structured. -- `commit.pre_hook = "semantic"` in `.cfg.toml`: on `cfg commit`, runs the semantic check and **warns** (never blocks unless `--strict`). Surfaces "this loosens shot-count rule which the validator assumes." -- Implemented as a `cfg-semantic` plugin so the core has zero LLM dependency. Provider configurable (Claude default). - ---- - -## 8. Project config: `.cfg.toml` - -```toml -[project] -name = "example-agent-configs" - -[stores] -# logical mapping of config_id → which collection/table holds the runtime doc -runtime_collection = "agent_configs" -history_collection = "config_history" -id_field = "config_id" # the field that identifies a config in the runtime store - -[hash] -ignore_fields = [ # stripped before hashing/diff (runtime-owned noise) - "_id", "metrics", "updated_at", "updated_by", "created_at", "created_by", -] -ignore_field_patterns = ["instructions_backup_*"] - -[identity] -author_from = "git" # git user.email | env CFG_AUTHOR | os user - -[env.dev] -backend = "mongo" -runtime_uri = "env:MONGODB_URI" # read from env var (never hardcode secrets) -history_uri = "env:MONGODB_URI" -db = "appdb-dev" -gated = false # dev = ungated, instant - -[env.prod] -backend = "mongo" -runtime_uri = "env:PROD_MONGODB_URI" -db = "appdb" -gated = true # prod = requires confirmation / review policy (§10) - -[env.preview] -backend = "mongo" -runtime_uri = "env:PREVIEW_MONGODB_URI" -db = "appdb-preview" -``` - -Secrets always via `env:VAR`, never literals. - ---- - -## 9. Identity / authorship - -`author` resolved at commit time, in order: `--author` flag → `CFG_AUTHOR` env → git `user.email` → OS username. Stored verbatim on the entry. (No auth system; trust-based like git, but always attributed: which already beats today's overwritten `updated_by`.) - ---- - -## 10. Environments + gating policy (dev instant, prod controlled) - -- **dev** (`gated=false`): `commit`/`restore` apply instantly, no confirmation. Matches today's raw-Mongo speed. This is where 95% of edits happen. -- **prod** (`gated=true`): mutating verbs (`commit`, `restore`, `set`, `adopt`) require explicit confirmation (`--yes` to bypass interactively) and honor an optional **policy**: - ```toml - [env.prod.policy] - require_reason = true # -m must reference a ticket/INC for prod - require_clean = true # refuse prod commit if status is DIRTY (force adopt first) - break_glass = true # allow --force with a recorded reason for incidents - ``` -- The *propagation* is still instant in both (no deploy): gating adds a human checkpoint, not latency. Emergency prod edits: `cfg commit … --env prod --force -m "INC-123 break-glass"` records the bypass with reason so the audit trail stays honest. -- **The load-bearing org control (out of tool scope but stated):** to make the history trustworthy, restrict *direct* prod-DB write creds so `cfg` (its service identity) is effectively the only prod writer; devs keep raw access to dev. The tool's `status`/`adopt` are the backstop for the inevitable break-glass. - ---- - -## 11. Migration (cutover from today) - -1. `cfg init --env dev` and `--env prod` (creates `config_history` + indexes; runtime untouched). -2. `cfg import --strip-backups` on dev, review, then prod (with confirm): baselines all 43 configs as `seq=1`, removes the 15 legacy `instructions_backup_*` keys (their content is preserved AS the baseline version). -3. `cfg hooks install` in the backend repo. -4. Replace the old `seed_*.py`/raw-Mongo habit: devs now `cfg edit/set` → `cfg commit`. (The existing `seed_agent_planner_shape.py` "owned-fields" logic informs `ignore_fields`/staging.) -5. Optional: lock down prod write creds (Section 10). -No runtime/app change required: the application keeps reading the same runtime collection; it never knows `cfg` exists. - ---- - -## 12. Agent / AI interface (Claude, Codex, others): first-class, not an afterthought - -Three mechanisms, increasing structure: - -**(a) `--json` on every verb.** Stable, documented JSON schemas (the "plumbing": git's lesson: agents depend on stable output, never the pretty text). Meaningful exit codes (Section 5). An agent can already drive the CLI safely with `--json -y`. - -**(b) MCP server (`cfg-mcp`).** Exposes the core as typed tools so ANY agent runtime calls them as first-class tools (no shell-string fragility): -``` -cfg.status(env?, config_id?) -> [{config_id, state, head_seq, …}] -cfg.log(env?, config_id?, as_of?, tag?, limit?) -> [entry…] -cfg.diff(env?, config_id, a, b, semantic?) -> {changes:[…]} -cfg.show(env?, config_id, ref) -> {doc} -cfg.commit(env, config_id, doc|patch, message, author?) -> {config_id, seq, hash} -cfg.restore(env, {config_id?|as_of?|tag?}, preview?, dry_run?, only?, include_code?) -> {plan|result} -cfg.tag(env, tag, as_of?|ref?) -> {ok} -cfg.points(env, around, window?) -> [moment…] -``` -Each tool: typed args (JSON-schema validated), structured result, idempotency where applicable, and a `dry_run` on every mutating tool so an agent can preview before applying. Mutating tools on `gated` envs return a "needs confirmation" result unless an explicit `confirm:true` is passed (so an agent can't silently mutate prod). - -**(c) Claude skill (`/cfg`).** A SKILL.md that teaches the model the workflow + guardrails, wrapping (b): -- Verbs + when to use them; the dirty-tree rule ("ALWAYS `cfg.status` before `cfg.commit`; if DIRTY, surface and `cfg.adopt` with attribution, don't clobber"). -- Safety contract: "NEVER write the runtime DB directly; only via `cfg.*`. `restore` is non-destructive. On prod, require explicit human confirm." -- Canned flows: "reproduce June-7" → `cfg.points(around)` → `cfg.restore(as_of, preview=true)` → report URL → on human ok `cfg.restore(as_of)`. "version a prompt edit" → stage → `cfg.diff` (optional `semantic`) → `cfg.commit`. -- Output etiquette: always show the resulting `config_id@seq (hash)` and the one-line diff stat so the human sees what landed. - -The same engine serves all three; the skill/MCP are thin. An agent gets *git-for-configs* as typed tools, with previews and confirmation gates baked in so it's safe to let Claude/Codex operate it autonomously. - ---- - -## 13. Edge cases & failure modes (explicit decisions) - -| Case | Decision | -|---|---| -| Commit when runtime drifted from HEAD (bypass) | Refuse, exit 2, tell user to `status`/`adopt`. `--force` records the bypass as a baseline first. | -| Commit identical to HEAD | No-op, exit 0 ("nothing to commit"). | -| Restore a config that didn't exist at T | Skip it, report "did not exist at T"; don't delete the now-existing config (no destructive deletes in v0.1). | -| Config deleted from runtime but has history | `status`=missing. `restore` can re-create it (put_config). | -| Two devs commit same config concurrently | Second `commit_apply` sees a seq/hash collision (unique `(config_id,seq)` + CAS on base hash) → second one fails with "base is stale, re-pull", exit 2. No lost update. | -| `commit_apply` partial failure (history written, runtime not) on a non-txn backend | Adapter must compensate: either retry the runtime write or roll back the history entry; never leave history claiming a state that isn't live. Standalone Mongo: do runtime write first only after history insert succeeds, then if runtime write fails, mark/delete the orphan entry. (Prefer replica-set txn.) | -| Huge `instructions` diff noise from reflow | Default diff is section/line unified; offer `--semantic`. Never block on diff size. | -| Clock skew on `ts` | Server/adapter assigns `ts` (trusted), not the client. | -| Hash collision | 64-bit prefix; on the astronomically unlikely collision, compare full sha256 retained internally. | -| Restore mid-incident with dirty prod | Warn (live will be overwritten); `--force` to skip prompt; the pre-restore live state is still captured (adopt it first or it's recoverable via the just-overwritten config's history if it was committed). | -| Secret in a config doc | Out of scope to encrypt; docs shouldn't hold secrets. `ignore_fields` can exclude a field from history if needed. | - ---- - -## 14. Non-goals (v0.1) - -- Branching/merging of configs (git has it; we don't need it for DB configs: linear history per config + restore covers the real workflow; revisit only if a real branching need appears). -- Diff-based storage / packing (full docs; `gc` is future). -- Built-in auth/RBAC (rely on DB creds + gating policy + org cred lockdown). -- Encrypting docs at rest. -- A GUI (CLI + JSON + MCP + skill cover humans and agents). - ---- - -## 15. Build order (ships value incrementally) - -1. **Core + MongoAdapter + history schema + hashing + `init`/`import`/`status`/`commit`/`log`/`show`/`diff`.** (Kills silent overwrite via CAS; gives history + diff immediately; replaces backup-key habit.) -2. **`restore` single + `--as-of` emergent + `--dry-run` + `--preview` (minimal) + `tag`/`points`.** (Delivers the rollback flow.) -3. **Git linkage (`hooks install`, trailer parsing, `set_git_sha`).** -4. **Agent surface: `--json` everywhere → MCP server → Claude skill.** -5. **Optional LLM semantic diff plugin.** Full preview deploy. `PostgresAdapter` (proves the seam). - -Each stage is independently useful; stop anywhere and you still have a coherent tool. -``` -``` diff --git a/docs/project-notes/findings.md b/docs/project-notes/findings.md deleted file mode 100644 index 98c64a6..0000000 --- a/docs/project-notes/findings.md +++ /dev/null @@ -1,128 +0,0 @@ -# cfgit First-Task Findings - -Date: 2026-06-21 - -## Scope Confirmation - -`docs/SPEC_CORE.md` is the right current framing: cfgit is non-custodial version control for live datastore records. The backend supports that framing. Runtime behavior is controlled by live Mongo records, and both human scripts and admin/runtime code can change those records without a deploy. - -No production reads or writes were performed. Local Docker Mongo was started and inspected read-only at `mongodb://localhost:27017/appdb-dev?directConnection=true`. - -## Backend Runtime Reads - -`agent_configs` is runtime-authoritative through `app/services/agentic_v2/persona_loader.py`. - -The main loader reads: - -`db["agent_configs"].find_one({"config_id": config_id, "is_active": True})` - -There is no cache in that function. It returns the live Mongo doc and then applies only optional in-process dev overrides from tool context. - -Additional runtime readers exist: - -- `load_active_personas` reads active persona docs by `agent_type`, `is_active`, and `participates_in`. -- `deep_research_service` reads `agent_configs` with `config_id` and `is_active`. -- `director.py` reads `director_settings` by `config_id` without `is_active`. -- Many agentic and canvas services call `load_agent_config`, so the same live read path fans out broadly. -- Admin and skills endpoints can update `agent_configs` directly by `config_id`. - -Finding: `agent_configs` is live control-plane data, not just prompt text. - -## Seed Script Pattern - -The seed and patch scripts match the handoff: - -- They load `.env`. -- They use `MONGODB_URI` and `MONGODB_DB_NAME`. -- They read or write `agent_configs` directly. -- They commonly update by `config_id`. -- Several scripts own only selected fields and preserve the rest. -- There are 19 `seed_*.py` plus `backfill_*.py` files in `scripts/`. - -This confirms the dirty-workflow premise: config changes can arrive through many script paths, not just one official editor. - -## Local Mongo State - -Docker was initially stopped. After starting Docker, the local container `tinify-jobrouter-mongo-1` was healthy and running Mongo as replica set `rs0`. - -Local `appdb-dev` contains `modelgarden_models` but does not contain `agent_configs`. - -Because local `agent_configs` is absent, I could not verify the live `agent_planner` document shape from local data. I did not fall back to managed Mongo. The code-level assumptions for `agent_configs` are confirmed; the data-shape confirmation remains open until a local or dev snapshot with `agent_configs` is available. - -## Per-Collection Identity And Live Rules - -### agent_configs - -Recommended v1 identity: - -`id_field = "config_id"` - -Recommended live rule: - -`live_when = { is_active = true }` - -Why: - -- The authoritative runtime loader uses `config_id` plus `is_active: true`. -- Persona fanout uses `is_active: true`. -- Seed scripts sometimes omit `is_active` in update filters, but the runtime live selector is still active-only. - -Open item: - -- `director_settings` is read by `config_id` without `is_active`. cfgit should either require that doc to match the collection live rule or leave it out of the first configured set until local data confirms its shape. - -### modelgarden_models - -Original respec said `id_field = "model_id"`. Real code and local data do not support that for v1. - -Recommended v1 identity: - -`id_field = "model_path"` - -Recommended live rule: - -`live_when = {}` - -Why: - -- Runtime job submission reads models by `model_path`. -- Agentic model tools and executors read by `model_path`. -- Admin config endpoints read and update by `model_path`. -- Local data has 221 docs and `model_path` has no duplicates. -- Local data has duplicate `model_id = "fal-ai-sync-lipsync-v3"` under every obvious live filter checked: none, `enabled`, `active`, `enabled + active`, `jobrouter_enabled`, and `enabled + jobrouter_enabled`. - -Finding: - -`model_id` is not a safe stable id for cfgit v1. It may still be useful as metadata, but the versioned record identity should be `model_path` unless production data proves otherwise. - -## modelgarden_models Duplicate Finding - -Local duplicate: - -`model_id = "fal-ai-sync-lipsync-v3"` - -Both duplicate docs are enabled, active, and jobrouter-enabled. They share the same `model_path`, but local `model_path` is unique across the collection. - -The two docs differ in fields such as `is_shortlisted`, `fallback_models`, `updated_at`, and fallback metadata. This is not a harmless inactive duplicate under the currently checked filters. - -## Spec Mismatches Flagged - -1. `examples/.cfg.toml` should use `model_path` for `modelgarden_models.id_field`, not `model_id`. -2. `docs/SPEC_CORE.md` and `HANDOFF.md` should describe `modelgarden_models` identity as `model_path` based on current code and local data. -3. The local environment note in `HANDOFF.md` is stale: local Mongo was not running at first, but Docker can start the replica-set container successfully. -4. The `agent_configs` data-shape check is incomplete locally because the local database lacks that collection. - -## Conclusion - -The non-custodial framing holds. The v1 should remain narrow and general: - -- Version live records in place. -- Detect out-of-band drift. -- Adopt drift into history. -- Refuse commits that would clobber unadopted drift. -- Restore one record or the configured system to a prior point. - -For the origin collections, the current best v1 config is: - -- `agent_configs` by `config_id`, live when `is_active = true` -- `modelgarden_models` by `model_path`, live when `{}` diff --git a/docs/project-notes/handoff.json b/docs/project-notes/handoff.json deleted file mode 100644 index d6c79cc..0000000 --- a/docs/project-notes/handoff.json +++ /dev/null @@ -1,107 +0,0 @@ -{ - "project": "cfg", - "tagline": "Git-shaped version control for database-resident config documents", - "handoff_purpose": "Brief for a fresh Claude Code session to take over implementation. Read HANDOFF.md first (this is its machine-readable index).", - "status": { - "phase": "spec complete, build NOT started (per user: do not implement yet)", - "spec_version": "0.3.2", - "spec_path": "docs/SPEC.md", - "exit_gate": "GREEN — 4 adversarial rounds, no remaining Tier-0/Tier-1 defects", - "repo": " (standalone git repo, OUTSIDE backend-repo, branch main)" - }, - "first_task_before_building": { - "instruction": "Read the actual backend-repo code to understand the real problem firsthand and confirm/flag the spec vs reality. Write FINDINGS.md. Only then build.", - "read_in_order": [ - {"path": "/app/services/agentic_v2/persona_loader.py", "why": "line ~92 db['agent_configs'].find_one({config_id, is_active:True}) is the runtime-authoritative read (live, per-build, no cache, no deploy). Confirm no cache, is_active selector, any other readers."}, - {"path": "/scripts/seed_agent_planner_shape.py", "why": "de-facto prototype of what cfg replaces: find_one({config_id,is_active:True})->update_one, prompt in sibling .txt, return 2 exit-code collision. Skim 3-4 other seed_*.py."}, - {"path": "/scripts/", "why": "19 seed_*.py + backfill_*.py scripts raw-write agent_configs = the daily workflow and adoption blocker cfg must absorb (apply-doc shim, SPEC §13)."}, - {"path": "/.env", "why": "MONGODB_URI, MONGODB_DB_NAME=appdb-dev. NOTE no PREVIEW/PROD URI yet; spec needs distinct DEV_/PREVIEW_/PROD_MONGODB_URI."}, - {"path": "mongosh against appdb-dev (READ-ONLY, dev not prod)", "why": "inspect real agent_configs doc shape (~30 fields incl instructions ~28k chars, client_config, tools[7], skills, fallback_models, prompt_templates, phase_contract, default_options, plus instructions_backup_ keys ~15 across 5 configs). Validate SPEC §7 edge_fields against reality."} - ] - }, - "problem": { - "summary": "4 backend devs edit runtime-authoritative LLM agent configs in MongoDB agent_configs via raw Mongo writes through Claude Code scripts. DB overrides code; a write changes agent behavior instantly with no deploy.", - "pains": [ - "Silent overwrites: no concurrency control, lost updates", - "No history/rollback: devs hand-paste instructions_backup_ keys (~15 across 5 configs)", - "Invisible drift: dev/prod and DB-vs-intended drift silently" - ], - "anchor": "a teammate asked for a deploy preview of the June 7 config state after a behavior regression. cfg must make 'restore whole system to June 7' easy (emergent system restore)." - }, - "people": { - "Maintainer": "backend+ML", - "a teammate": "applied AI, edits configs", - "another teammate": "applied AI, edits configs", - "the PM": "manager + PM, needs visibility (read-only/log surfaces matter)" - }, - "decisions_made_do_not_relitigate": [ - "Keep instant/no-deploy; gating adds a human checkpoint on prod, never latency", - "Asymmetric gating: dev ungated (95% of edits), prod gated via out-of-band human approval; org-level prod-cred lockdown is the real-world backstop (out of tool scope)", - "Runtime store stays dumb (current docs only); versioning in tool-owned config_history + config_heads; emergent via everyone using the tool", - "Storage-agnostic: core depends only on StorageAdapter + ApprovalProvider seams; Mongo first, Postgres = one adapter class; CI gate forbids DB driver / LLM SDK in core", - "Full-doc-per-version (no diff storage); bitemporal (recorded_at vs valid_from/valid_to)", - "Real CAS via config_heads HEAD pointer + atomic apply() that also CAS-checks live doc (expected_live_oid) so raw bypass fails closed", - "Agents propose/preview anything; applying to gated env always crosses a human (no agent-suppliable flag, no cfg.approve MCP tool)", - "License Apache-2.0; git command vocabulary reused as design idea only (no git source copied); semantic-diff prior art cited (CREDITS.md)" - ], - "rejected": ["building a general SaaS product", "branching/merging", "RBAC in tool", "output/vision evals", "diff storage"], - "differentiator": "System-impact analysis (SPEC §7): given a config change, report its NATURE and CONSEQUENCES across the whole agent graph (downstream contract breaks, cross-config conflicts, blast radius). Structural facts computed LOCALLY (no LLM, no egress); optional LLM narrates the why. Off-the-shelf tools diff a prompt string only.", - "scaffold": { - "build_contract": "docs/SPEC.md (v0.3.2)", - "key_starting_file": "src/cfg/adapters/base.py — full StorageAdapter Protocol (21 methods) + error types mapped to exit codes; implement MongoAdapter against it first", - "layout": { - "docs/SPEC.md": "spec (build contract)", - "docs/SPEC_v0.1.md": "original v0.1 (historical)", - "src/cfg/core/": "engine (NO db driver / NO llm sdk) — hashing/asof/engine/refs TO BUILD", - "src/cfg/adapters/base.py": "StorageAdapter Protocol stub (full surface)", - "src/cfg/approval/base.py": "ApprovalProvider Protocol stub", - "src/cfg/cli/main.py": "CLI entrypoint stub", - "src/cfg/mcp/": "MCP server TO BUILD", - "plugins/cfg_impact/": "impact plugin (kept OUT of core for license boundary)", - "examples/.cfg.toml": "real aistudio agent_configs shape, ready to copy", - "tests/test_core_purity.py": "enforces SPEC §1 (no DB/LLM import in core); RUNS, passes", - "LICENSE": "Apache-2.0", - "CREDITS.md / NOTICE": "attribution (git=idea-only; semantic-diff prior art)" - }, - "verify_runs": [ - "cd && PYTHONPATH=src python3 -c \"from cfg.adapters.base import StorageAdapter; print('ok')\"", - "pip install -e '.[dev]' && PYTHONPATH=src python3 -m pytest tests/ -q" - ] - }, - "build_order_spec_section_17": [ - "Stage 1: Core + MongoAdapter (replica-set) + schema (+valid_to) + co-location/txn checks + hashing + init(+invariant+atomicity-scope) + import + put_config/seed_config + status + commit(dual-CAS) + log(interval as-of) + show + diff", - "Stage 2: restore single + system (valid-time + --tag + --resume/plan_oid) + --dry-run + --preview(minimal) + activate + tag/points + fsck", - "Stage 3: approval flow (local then slack) + approve/deny/approvals + exit-4/7 split + gating/secret pre-flight + redact(+--rewrite-history) + force-routing", - "Stage 4: agent surface --json -> MCP (envelope incl cfg.impact) -> Claude skill", - "Stage 5: cfg apply-doc shim + migrate 19 seed scripts + CI gate + cron-adopt + refs check", - "Stage 6: LLM system-impact plugin (egress-controlled, local structural core) + full preview deploy + PostgresAdapter (proves the seam)" - ], - "hard_constraints_non_negotiable": [ - "NEVER write to prod DB in any condition; reads only on prod; all mutations to local or remote DEV; ask first if a prod write seems needed", - "During dev work use .env Mongo URI; local is the default; don't ask which env unless prod explicitly requested in the turn", - "Before any heavy/risky PROD collection mod (later, with permission), back up the affected docs first", - "No em-dashes in any user-facing text/docs/comments", - "Copyable text (commands/prompts) must be bare: no code-fence/quote wrappers, no lead-in line", - "Don't use vision/output evals; eval tool-calls, payloads, reasoning", - "Do not copy git source code (license boundary); reuse only design vocabulary", - "Do not put a DB driver or LLM SDK in src/cfg/core/ (CI gate fails)" - ], - "environment_gotchas": { - "local_mongo": "localhost:27017 is DOWN on this machine; project env/ venv + pymongo gone. Prior session used mongosh against remote managed Mongo, READ-ONLY, appdb-dev.", - "remote_cluster": "hosts both appdb (PROD) and appdb-dev (DEV). Touch only appdb-dev for writes.", - "to_build_cfg": "set up fresh venv + local Mongo REPLICA SET (single-node RS is fine and REQUIRED for transactions; apply() needs a txn; standalone Mongo makes cfg refuse gated mutations by design)." - }, - "hardening_history": { - "rounds": 4, - "v0.1": "20 defects (worst: is_active multi-doc, illusory CAS, uni-temporal as-of)", - "v0.2->v0.3": "6 write-path Tier-1 (txn co-location, put/seed_config, is_active flip verb, bypass-not-caught-by-CAS, hand-wavy valid-time, redact-vs-oid) + Tier-2s", - "v0.3->v0.3.1": "4 residual Tier-1 (put_config self-drift, import interval overlap, activate missing live-CAS, system-impact neighbor egress leak)", - "v0.3.1->v0.3.2": "1 Tier-1 from exit gate (strip_on_store missing from field-set invariant + hash strip) + 2 cosmetic; all patched; GREEN", - "confirmed_holding": ["HEAD-pointer CAS", "out-of-band human approval / agent-safety", "read-side runtime_filter keying", "dual-CAS", "redact-vs-oid (git linkage survives: trailers key on seq not oid)", "June-7 valid-time case"] - }, - "do_not": [ - "Start implementation before the FIRST TASK (read backend code) and confirming spec vs reality", - "Write to prod Mongo ever without explicit per-turn permission + backup", - "Redo the spec from scratch (hardened 4x); improve via small marked edits + changelog if code contradicts it" - ] -} diff --git a/docs/project-notes/handoff.md b/docs/project-notes/handoff.md deleted file mode 100644 index 1e4d368..0000000 --- a/docs/project-notes/handoff.md +++ /dev/null @@ -1,178 +0,0 @@ -# cfg / cfgit: Session Handoff (for a fresh Claude Code session to take over) - -> **You (the next Claude Code session) are taking over the `cfgit` project (CLI command `cfg`).** This file is your complete brief. Read it top to bottom, THEN do the "FIRST TASK" below (read the real backend code) before touching the build. - -> ### SCOPE WAS REFRAMED — read `docs/SPEC_CORE.md` BEFORE this file's older sections. -> After this handoff was first written, the project was reframed (the user explored the prior art and landed on a sharper positioning). The current truth: -> - **cfgit is NOT "config version control" and NOT "general database git."** It is **non-custodial version control for live datastores**: git for a database you already have and cannot migrate, versioning records *in place*, without owning the data/reads/writes. Tagline: "git that doesn't make you move in." This is the defensible niche; "general DB git" invites comparison to **Dolt** (which owns the store) and loses. -> - **The differentiator is DRIFT RECONCILIATION** (`status` detects out-of-band writes, `adopt` folds them into history, `commit` refuses to clobber un-adopted drift), because the team writes the DB from many paths and will NOT lock down creds (flat, equally-accountable startup). Every existing tool avoids this because nothing bypasses them; cfgit needs it. -> - **v1 is a NARROW feature set on a GENERAL engine, with TWO adapters (Mongo + Postgres)** to prove "any DB." Build: `init`/`status`/`diff`/`commit`/`log`/`adopt`/`restore` (single + system)/`tag`/`fsck`, plus optional engine-level mutation permissions for admin-only high-blast-radius actions. CLI + JSON are the base surface; UI/MCP/skill are thin wrappers over the same actions. -> - **CURRENT IMPLEMENTATION UPDATE:** CLI + JSON now has a localhost UI, MCP server, portable skill, and an optional `cfgit-impact` plugin. Out-of-band hosted approval remains deferred. The impact provider boundary lives only in `plugins/cfg_impact`, never in `src/cfg/core`. -> - **`docs/SPEC_CORE.md` is now the authority on framing + v1 scope.** `docs/SPEC.md` (v0.3.2) is the deep engine reference for the parts v1 builds; where they differ on scope, SPEC_CORE wins. Sections of THIS handoff written before the reframe (esp. §1's "config" emphasis and §2's approval-centric decisions) are superseded by SPEC_CORE where they conflict; the backend facts, constraints, env gotchas, and scaffold notes below remain accurate. -> - Commercial value is a **later discovery, not a reason to build**; assume internal-grade. The real gate is a **one-week usage test**: if the team reaches for `cfg` over raw writes, continue; if they route around it, stop and reassess. - -Everything here was produced in prior sessions that designed and pressure-tested the engine; your job is to understand the real problem against the actual codebase, confirm the spec matches reality, and then implement the v1 scope in SPEC_CORE. - ---- - -## 0. FIRST TASK (do this before anything else: do NOT skip to coding) - -The spec describes a real production system secondhand. Before implementing, **read the actual code** so you understand the problem firsthand and can catch any place the spec drifted from reality. Open and read, in this order: - -1. `/app/services/agentic_v2/persona_loader.py` - - Line ~92: `db["agent_configs"].find_one({"config_id": config_id, "is_active": True})`: this is the **runtime-authoritative read**. The app reads agent configs from Mongo live, per build, no cache, no deploy. THIS is why DB writes change agent behavior instantly, and why versioning has to live beside the runtime store without changing it. Confirm: is there really no cache? Is `is_active:True` always the selector? Are there other readers of `agent_configs`? -2. `/scripts/seed_agent_planner_shape.py` (and skim 3-4 other `seed_*.py`) - - This is the **de-facto prototype** of what `cfg` replaces: `find_one({config_id, is_active:True})` → diff/modify owned fields → `update_one`, prompt kept in a sibling `.txt`, idempotent, `return 2` on env-missing. Note the pattern; note the `return 2` exit-code collision the spec calls out. - - There are **19** such scripts in `scripts/` (`seed_*.py` + `backfill_*.py`). They are the daily raw-Mongo workflow and the **adoption blocker**: `cfg` must absorb this path (the spec's `cfg apply-doc` shim + migration, §13). -3. `/.env` - - Keys (names): `MONGODB_URI`, `MONGODB_DB_NAME` (=`appdb-dev`), `LLM_PROVIDER_*`, etc. NOTE: on the current machine local Mongo (`localhost:27017`) is dead and the `env/` venv is gone: see §5 "Environment gotchas." There is **no** PREVIEW or PROD Mongo URI in `.env` yet; the spec requires distinct `DEV_/PREVIEW_/PROD_MONGODB_URI` (§8, §10). -4. Inspect the real doc shapes (read-only). Use `mongosh` against dev (NOT prod). The prod/dev managed Mongo cluster hosts both `appdb` (prod) and `appdb-dev` (dev). - - `agent_configs`: a real `agent_planner` doc has ~30 fields: `instructions` (~28k chars), `client_config`{provider, model, temperature}, `tools`[7], `skills`, `fallback_models`, `prompt_templates`, `phase_contract`, `default_options`{max_tokens}, `timeout_s`, `version`(never bumped), `updated_by`, `metrics`, PLUS polluting `instructions_backup_` keys (≈15 across 5 configs: the manual versioning `cfg` replaces). Id = `config_id`; live-rule = `is_active:true`. - - **`modelgarden_models`** (the SECOND v1 collection): ~221 docs controlling provider routing, with fields like `enabled`, `active`, `jobrouter_enabled`, `retry_policy`, `provider_config`, `inputs`, `custom_input_schema`, pricing/timeouts; only `updated_at`/`updated_by` as history-ish fields. A prior session found duplicate `model_id` values in the data. Follow-up code/data inspection found runtime and admin config paths key models by `model_path`, and local dev data has unique `model_path` values. Id = `model_path`; live-rule = `{}` unless production data proves otherwise. It is written from 20+ code paths (services, endpoints, seed scripts), not just seed scripts. - - **PER-COLLECTION GATE (do this before building):** for EACH of the two collections, pin against real data: (a) the true stable id, (b) the "which one is live" rule (`live_when`), (c) whether the id is actually unique today. For `modelgarden_models`, current evidence points to `model_path`, not `model_id`. Getting this wrong makes the tool wrong on day one (see SPEC_CORE §9). - -**After reading**: write a short `FINDINGS.md` confirming SPEC_CORE's assumptions hold against the real code/data, with the per-collection id + live-rule decided for both collections, and flag ANY mismatch (a second reader of either collection, a cache, a genuinely-multi-live id). THEN proceed to build per **SPEC_CORE §11** (NOT SPEC.md §17, which is the older config-only order). - -**Hard constraints while doing this (NON-NEGOTIABLE, carried from the user):** -- **NEVER write to prod DB in any condition.** Reads only against prod; all mutations go to local or remote DEV. Ask first if a prod write ever seems needed. -- During dev work, **use the `.env` Mongo URI**; local is the default: don't ask which env unless prod is explicitly requested. -- Before any heavy/risky PROD collection mod (later, with permission), **back up the affected docs first**. -- **No em-dashes** in any user-facing text/docs/comments. Copyable text (commands/prompts) must be **bare** (no code-fence or quote wrappers, no lead-in line). -- Don't use vision/output evals; eval tool-calls, payloads, reasoning. - ---- - -## 1. What `cfg` is (the one-paragraph problem) - -A company runs an AI video-studio backend whose LLM agent configs live in a MongoDB `agent_configs` collection and are **runtime-authoritative**: the app reads them live (`persona_loader.py:92`), so a DB edit changes agent behavior instantly with no deploy. A small backend team edit these configs via **raw Mongo writes through Claude Code scripts**. That causes three pains: -1. **Silent overwrites**: no concurrency control; one dev's write clobbers another's (lost update). -2. **No history / rollback**: devs hand-paste `instructions_backup_` keys into the docs (≈15 across 5 configs); there is no real version history. -3. **Invisible drift**: dev vs prod, and DB-vs-what-was-intended, drift silently. - -`cfg` is a **git-shaped version-control tool** for these DB-resident config docs. It keeps the runtime store "dumb" (current docs only) and puts versioning in a **tool-owned `config_history` + `config_heads`** collection. Versioning is an **emergent property** of the process: everyone uses the tool instead of raw writes. Humans use a CLI (`cfg commit`/`log`/`diff`/`restore`); **AI agents use an MCP server + Claude skill** (this is a first-class requirement: Claude/Codex must operate it "like a charm"). A standout capability: **system-impact analysis**: given a config change, report its *nature* and *consequences across the whole agent graph* (downstream contract breaks, cross-config conflicts, blast radius), computed locally with an optional LLM narrator. - -The anchoring real-world ask: a teammate needed a deploy preview of the June 7 config state after a behavior regression. `cfg` must make "put the whole system back to how it was on June 7" easy (the emergent **system restore**). - ---- - -## 2. Key architecture decisions (already made: do not re-litigate) - -- **Keep instant/no-deploy behavior.** Gating adds a human checkpoint on prod, never latency; propagation stays instant in all envs. -- **Asymmetric gating:** dev ungated (95% of edits, matches today's speed); prod gated via out-of-band human approval. The load-bearing real-world control is org-level: lock down raw *prod* write creds so the cfg service identity is effectively the only prod writer (stated, out of tool scope but important). -- **Runtime store stays dumb.** All versioning lives in `config_history` (immutable entries) + `config_heads` (the CAS pointer). Hand someone the runtime collection alone and they couldn't tell versioning exists. -- **Storage-agnostic core.** Core depends ONLY on a `StorageAdapter` seam + an `ApprovalProvider` seam. Mongo first; Postgres later = one adapter class, zero core change. **CI gate: core imports no DB driver and no LLM SDK** (enforced by `tests/test_core_purity.py`, already written). -- **Full-doc-per-version** (no diff storage). Trivial at this scale; restore = copy; zero replay risk. -- **Bitemporal** model: `recorded_at` (transaction time) vs `valid_from`/`valid_to` (valid time): required so restores (old content stamped now) and backdated imports answer "what was live at T" correctly. -- **Real CAS** via a `config_heads` HEAD pointer (NOT seq-uniqueness) + an atomic `apply()` that also CAS-checks the live doc (`expected_live_oid`) so a raw-Mongo bypass fails closed instead of being clobbered. -- **Agents can propose/preview anything but applying to a gated env always crosses a human** (ApprovalProvider, out of band; no agent-suppliable flag, no `cfg.approve` MCP tool). -- **License:** Apache-2.0. Git's command vocabulary/porcelain-plumbing split is reused as *design ideas only* (GPL-2.0 binds copied code, not ideas; no Git source copied). Semantic-diff prior art is cited. See `CREDITS.md`. - -What was explicitly REJECTED: building a brand-new general SaaS product (off-the-shelf prompt tools are prompt-string-centric and fight the fat-Mongo-doc shape, but we're not trying to out-build them); branching/merging; RBAC inside the tool; output/vision evals; storing diffs. - ---- - -## 3. THE SPEC: two docs, SPEC_CORE wins on scope - -- **`docs/SPEC_CORE.md`** = framing + **v1 scope + build order** (the authority for what to build). Read it first. -- **`docs/SPEC.md` (v0.3.2)** = deep engine reference for the parts v1 builds. Structure: §0 glossary · §1 architecture (3 layers + CI boundary) · §2 StorageAdapter (the one atomic `apply()`, dual-CAS, intervals, reconcile, co-location check) · §3 history schema (entry + HEAD pointer) · §4 hashing + Field-set invariant · §5 commands · §6 preview · §7 LLM impact (DEFERRED) · §8 `.cfg.toml` · §9 secrets · §10 gating (DEFERRED) · §11 approval (DEFERRED) · §12 agent/MCP (DEFERRED) · §13 adoption · §14 edge-cases · §15 non-goals · §16/§18/§20 changelogs · §17 (old config-only build order, SUPERSEDED by SPEC_CORE §11) · §19 attribution. - -**v1 BUILD ORDER = `docs/SPEC_CORE.md` §11** (NOT SPEC.md §17). In short: -1. Core engine + history schema + hashing + **drift detection** (depends only on StorageAdapter; no DB driver; opaque records keyed by stable id). -2. **MongoAdapter** → `init` (validate per-collection id + live-rule against real data) → `status` (drift is the headline) → `diff` → `commit` (refuses to clobber un-adopted drift) → `log` → **`adopt`** (fold out-of-band writes into history; the differentiator). -3. `restore` (single + **system / "back to June 7"**) + `tag` + `fsck` + optional mutation permissions. -4. **PostgresAdapter** (proves the DB-neutral seam; Postgres ACID backs every guarantee cleanly). -5. Point at the two origin collections (`agent_configs` by `config_id`, `modelgarden_models` by `model_path`) and run the **one-week usage test**. - -**v1 implementation correction found by smoke test:** `oid` is a content hash and can repeat when a restore re-applies old content. Do NOT make `(env, collection, record_id, oid)` unique. Use unique `(env, collection, record_id, seq)` for entry identity, store `head_seq` for exact HEAD lookup, and keep `head_oid` for dirty detection. - -**Postgres v1 contract:** the Postgres adapter expects each live table to expose an id column, optional `live_when` scalar columns, and a `doc jsonb` column with the full versioned record. It is a deliberate proof of the DB-neutral opaque-record engine, not a relational-schema diff tool. - -**DEFERRED (build only on a real incident / real adoption):** out-of-band approval (SPEC §10-§11), because the flat team won't gate peers by default; the agent/MCP surface (SPEC §12), after CLI core is used; the LLM system-impact layer (SPEC §7), config-specific and worth validating with a throwaway script first; config-specific furniture `--strip-backups` and `is_active` activation move to plugins, not core. CLI + JSON only; no UI; no branch/merge in v1. The v1 permission gate is local author authorization at the engine mutation boundary, not a hosted approval system. - ---- - -## 4. The scaffold (already created: extend it, don't recreate) - -The project is a **standalone git repo at `/`** (OUTSIDE the backend-repo repo, by design; 2 commits on `main`). Layout: -``` -docs/SPEC.md the spec (v0.3.1): the build contract -docs/SPEC_v0.1.md the original v0.1 (historical, shows what the teardown found) -README.md project intro -LICENSE Apache-2.0 -NOTICE, CREDITS.md attribution (git=idea-only; semantic-diff prior art cited) -pyproject.toml cfgit package; extras: mongo/cli/mcp/impact/dev; `cfg` entrypoint -.gitignore -src/cfg/__init__.py -src/cfg/core/ engine (no DB driver / no LLM SDK): hashing/asof/engine/refs TO BUILD -src/cfg/adapters/base.py StorageAdapter Protocol (21 methods): STUB with full v0.3 surface -src/cfg/adapters/__init__.py -src/cfg/approval/base.py ApprovalProvider Protocol: STUB -src/cfg/approval/__init__.py -src/cfg/cli/main.py CLI entrypoint: STUB (prints "spec stage") -src/cfg/mcp/__init__.py MCP server: docstring only, TO BUILD -plugins/cfg_impact/ impact plugin (kept OUT of core for license boundary): README only -examples/.cfg.toml real aistudio agent_configs shape, ready to copy -tests/test_core_purity.py enforces §1 (no DB/LLM import in core): RUNS, passes -``` -`src/cfg/adapters/base.py` is the most useful starting point: it already declares the full `StorageAdapter` contract (`get_config`/`put_config`/`seed_config`/`activate_config`/`apply` with `expected_head_oid`+`expected_live_oid`/`query_history`/`redact_field`/`reconcile`/`check_atomicity_scope`/...) plus the error types (`StaleHead`/`StaleLive`/`AmbiguousConfig`/`NoSuchConfig`) mapped to exit codes. Implement `MongoAdapter` against this Protocol first. - -Verify the scaffold runs: -- `cd && PYTHONPATH=src python3 -c "from cfg.adapters.base import StorageAdapter; print('ok')"` -- `PYTHONPATH=src python3 -m pytest tests/ -q` (after `pip install -e '.[dev]'`) - ---- - -## 5. Environment gotchas (this machine) - -- **Local Mongo is dead** (`localhost:27017` not running) and the project `env/` venv + pymongo are gone on the current machine. For schema inspection during the prior session we used `mongosh` (available) against the remote managed Mongo cluster, READ-ONLY, against `appdb-dev`. -- The prod/dev managed Mongo cluster hosts both DBs. **Prod is `appdb`; dev is `appdb-dev`.** Touch only `appdb-dev` for any write; prod is read-only and only when explicitly needed. -- The user's global rule: standalone migration/db scripts must use the `.env` Mongo URI. -- To build/run `cfg` you'll likely set up a fresh venv and a local Mongo **replica set** (single-node RS is fine and is REQUIRED for transactions: `apply()` needs a txn; standalone Mongo makes `cfg` refuse gated mutations by design). `examples/.cfg.toml` shows the expected env var names (`DEV_/PROD_/PREVIEW_MONGODB_URI`). - ---- - -## 6. How the spec was hardened (so you trust it) - -The spec went through **three adversarial review rounds**, each revise→attack→revise: -- v0.1 → **20 defects** found. Worst three (all fixed): multi-doc-per-config_id (`is_active`) broke the one-doc model; the CAS was illusory (seq-uniqueness allows lost update); `as-of` was uni-temporal so "reproduce June 7" silently returned wrong state. -- v0.2 → second review found **6 write-path Tier-1 defects + Tier-2s** (all fixed in v0.3): txn co-location (atomicity silently false across clusters), `put_config`/`seed_config` undefined, no `is_active`-flip verb, bypass-not-caught-by-CAS, hand-wavy valid-time, redact-vs-oid identity break. -- v0.3 → third review found **4 residual Tier-1 edges** (all fixed in v0.3.1): `put_config` self-induced drift (full-doc/field-set invariant), backdated-import interval overlap (proper split), `cfg activate` missing the live-CAS, system-wide impact egress leaking a non-allowlisted neighbor's text. -- v0.3.1 → final exit-gate check found **one** contained Tier-1 item (`strip_on_store` omitted from the new field-set invariant + hash strip), now patched in **v0.3.2**. **Exit-gate is GREEN; spec is build-ready.** - -Confirmed-holding across rounds (do not second-guess unless the code says otherwise): the HEAD-pointer CAS, the out-of-band human-approval/agent-safety model (strongest part), the read-side `runtime_filter` keying, the dual-CAS, redact-vs-oid (git linkage survives because trailers key on `seq` not `oid`), and the June-7 valid-time case. - ---- - -## 7. The people / context (for tone + the impact-layer edges) - -- **Maintainer**: backend + ML. **Two teammates**: applied AI team, also edit configs. **PM**: needs visibility, so read-only and log surfaces matter. -- The configs form a real graph: `agent_planner` (the ~28k-char planner), shape/fill split personas, shot-breakdown persona, intent classifiers, audio/TTS configs, etc. The impact layer's edge model (`phase_contract`, shared `tools`, `prompt_templates`, `fallback_models`, `skills`) is wired in `examples/.cfg.toml [impact].edge_fields`: but you should VALIDATE those field names against the real docs when you build §7. - ---- - -## 8. What NOT to do - -- Do **not** start implementation until you've done the §0 FIRST TASK (read the backend code) and confirmed the spec vs reality. -- Do **not** write to prod Mongo, ever, without explicit per-turn permission + a backup first. -- Do **not** copy Git source code (license boundary); reuse only the design vocabulary. -- Do **not** add output/vision evals to the impact layer. -- Do **not** put a DB driver or LLM SDK in `src/cfg/core/` (CI gate fails). -- Do **not** re-do the spec from scratch; it's been hardened three times. Improve it where the real code contradicts it, via small marked edits + a changelog entry. - ---- - -## 9. Exit-gate verdict (the one open item from the prior session) - -A final verification subagent checked whether v0.3.1's four Tier-1 fixes fully close their defects with no new Tier-0/1 hole. **Result: GREEN.** Three of the four closed cleanly; Tier-0 none; the fourth (`put_config` field-set invariant) had ONE contained Tier-1 omission: `strip_on_store` was missing from the field-set whitelist and the hash-time strip, which would falsely flag secret fields and break the dirty test for secret-bearing configs. **That has been patched in v0.3.2** (§4 + §2.1; see §20 changelog). The spec phase is therefore **DONE**: the spec is build-ready for Stage-1. Proceed to the build only after the §0 FIRST TASK (read the real backend code). There are no remaining Tier-0/Tier-1 defects. - ---- - -## 9b. Naming convention (carried decision: keep it) -`docs/SPEC.md` is in **two layers**: **Part 1 (Using cfg)** = plain language for users + agents; **Part 2 (Internals)** = the technical spec. The rule the user set: **anything a person or agent types, reads, or edits is human-readable; engine internals stay technical.** Concretely: the `.cfg.toml` keys are plain (`live_collection`, `live_when`, `secret_fields`, `[connections].links`, `needs_approval`, `confirm_word`, ...), and the MCP wire statuses are plain (`changed_outside_cfg`, `needs_human_ok`, `was_declined`, `bad_config`). Internals keep technical names (`oid`, `recorded_at`/`valid_from`, `StorageAdapter`, `expected_live_oid`, the CAS). The 1:1 mapping between the two is in the "Naming note for the implementer" right after Part 1, and MUST live authoritatively in ONE place in code: the **config loader** (reads friendly keys → internal names) and the **MCP envelope serializer** (internal status → plain wire word). When you build, do NOT leak `edge_fields`/`runtime_filter`/`dirty` into anything a user sees; do NOT bother renaming internals. `examples/.cfg.toml` already uses the plain keys: keep it the source of truth for the user-facing shape. - -## 10. TL;DR for you, the next session -1. Read this file + `docs/SPEC.md`: **Part 1 first** (plain usage), then Part 2 (internals) when you build. -2. Do the **§0 FIRST TASK**: read `persona_loader.py`, the `seed_*` scripts, `.env`, and the real `agent_configs` shape (dev, read-only). Confirm/flag spec-vs-reality. Write `FINDINGS.md`. -3. Set up a local Mongo **replica set** + venv; `pip install -e '.[dev,mongo,cli]'`; confirm `pytest tests/` passes. -4. Build **Stage 1** (§17): `MongoAdapter` against `src/cfg/adapters/base.py`, then `init`/`import`/`status`/`commit`(dual-CAS)/`log`/`diff`. Keep it usable end of stage. -5. Honor every constraint in §0 and §8. diff --git a/docs/project-notes/review-findings.md b/docs/project-notes/review-findings.md deleted file mode 100644 index a64d6fc..0000000 --- a/docs/project-notes/review-findings.md +++ /dev/null @@ -1,153 +0,0 @@ -# cfgit Implementation Review — Findings & Defects - -Date: 2026-06-21. Source: four parallel adversarial reviewers reading the actual code -(concurrency/drift, history/restore, adapter parity, security). This is a defect list -for a follow-up session to fix. Nothing here is editorialized: each item has a -file:line and a concrete fix. Severities: **H** blocks handing to an external team, -**M** real bug to fix before relying on it, **L** polish/correctness-hygiene. - -## What is already correct (do NOT "fix" these) -- **Dual-CAS is real and atomic** on both Mongo and Postgres: the live-doc re-hash - happens INSIDE the transaction (`mongo.py:173-178`, `postgres.py:211-217`), so a raw - bypass is caught as `StaleLive`, not clobbered. Concurrent commits serialize via the - txn + unique `(env,collection,record_id,seq)` index. -- **Hashing is correct** (`hashing.py`): sorted keys, NFC, null==missing, int/float - equal, NaN/Inf rejected; `ignore_fields`/`ignore_patterns`/`ignore_paths`/`secret_fields` - all stripped before hashing; secrets also stripped from the stored doc. -- **adopt is correct**: folds live as a new version, `expected_live_oid=None` (waives - live-CAS) but keeps head-CAS, idempotent no-op when already clean (`engine.py:97-123`). -- **Valid-time "back to June 7" math is real**: true `[valid_from, valid_to)` interval - containment (`valid_from <= T < valid_to`), not a `recorded_at` approximation; returns - exactly one version (`mongo.py:98-100`, `postgres.py:143-146`). Single-record restore - is non-destructive by construction (restore re-applies old content as a new seq; - identical oid across seqs is expected and handled by the unique key being seq, not oid). -- **Authz is engine-enforced**, not bypassable via MCP/UI/direct engine calls - (`engine.py:_authorize` called first in every mutator). writers vs admins + admin_actions - (e.g. `restore_system`) enforced (`authz.py`). -- **LLM SDK isolation is excellent**: all vendor code in `plugins/cfg_impact/` only; - two CI gates (`test_core_purity.py`, `test_impact_boundary.py`); clean provider - base+factory pattern. Core stays DB-free and LLM-free. - ---- - -## HIGH (blockers for external handoff) - -### H0 — `cfg status`/`log` CRASH on Mongo: `_history_row` is undefined in the Mongo adapter -- **Where:** `src/cfg/adapters/mongo.py:121` calls `_history_row(row, with_doc=with_doc)`, but `_history_row` is defined ONLY in `src/cfg/adapters/postgres.py:503` (never defined in or imported into `mongo.py`). -- **Found by:** the third-party clean-room sim (a fresh user pointing cfgit at their own Mongo). `init` and `import --all` succeed, then the very next command, `cfg status`, dies with `NameError: name '_history_row' is not defined`. This blocks core functionality on Mongo — the DEFAULT backend — for any path that reads history (status, log, system restore, tag listing). Introduced in commit `b042fe1` ("Initial public cfgit release"). -- **Severity:** Highest practical severity — the tool is unusable past `import` on Mongo out of the box. (The unit suite missed it because no test exercises `query_history` against a live Mongo; see L9.) -- **Fix:** Define a Mongo `_history_row` normalizer (mirroring `postgres.py:503`) that strips the Mongo `_id`/storage internals and returns the same JSON-safe row shape `{collection, record_id, seq, oid, parent_oid, op, valid_from, valid_to, author, message, recorded_at, [doc]}`, and use it at `mongo.py:121`. This simultaneously fixes L1 (non-JSON-safe ObjectId in Mongo output). Add a live-Mongo `query_history` test (L9) so this can't regress. - - -### H1 — Secret pre-flight deny-list is completely unimplemented -- **Where:** `src/cfg/core/config.py:81-161` (never parses `[secrets]`), `src/cfg/core/engine.py:126-150` (`commit` does no scan), `src/cfg/cli/main.py:109-112` (no `--allow-secret`). -- **Problem:** The example config ships `[secrets]` with `block_fields`/`block_values`/`on_match="refuse"` (`examples/.cfg.toml:31-34`) and SPEC §9 mandates a deny-list scan on every commit. None of it exists in code (grep for `block_fields|block_values|on_match|allow_secret|SecretsConfig` = zero hits). A secret-shaped value (`sk-...`, AWS key) pasted into any field NOT enumerated in `secret_fields` is committed into `config_history` permanently, no refusal, no audit. -- **Fix:** Add a `SecretsConfig` dataclass (`deny_field_globs`, `deny_value_regex`, `on_match`), parse it in `load_config`, and scan `new_doc` in `Engine.commit` (and future `set --commit`) BEFORE `_entry`/`apply`. On match with `on_match="refuse"`, raise (exit 1) unless `allow_secret=True` threaded from a new `--allow-secret` CLI/MCP flag, recording `meta.allow_secret`, author, reason in the entry. - -### H2 — No `share_with_ai` egress allowlist and no consent line on the LLM path -- **Where:** `src/cfg/core/config.py:49,158` (`share_with_ai` parsed but ZERO readers), `plugins/cfg_impact/cfg_impact/overview.py:73-84` (sends whenever `use_llm=True`). -- **Problem:** SPEC §7.5 requires egress to be "off by default; opt-in PER CONFIG" via `share_with_ai`, plus a consent/log line naming the provider and which record_ids' text will be sent. Neither exists. `--llm` ships full (diff-stripped) config text to Anthropic/OpenAI with no gate and no notice. Also `_overview_prompt_payload` (`overview.py:233-238`) has an inverted filter (`key not in {"changes"} or len(value) <= 40`) that KEEPS the full `changes` array (old/new field values) in the common small-diff case. -- **Fix:** Before building/sending the payload, gate the changed record's `record_id` against `engine.config.connections.share_with_ai`; if not allowlisted, return structure-only (no prose, no `changes` text). Emit a one-time-per-session consent line naming provider + exact record_ids before `llm.narrate(...)`. Fix the payload filter to drop `changes` entirely from the LLM payload (send only `changed_paths` + counts). Apply `strip_for_hash`/secret redaction at the egress boundary itself (defense-in-depth), not just upstream in `engine.diff` — `deterministic_overview` currently reads raw unstripped `=live` docs (`overview.py:35-36,131`). - -### H3 — System restore cannot restore deleted (history-only) records; one deletion blocks the whole restore -- **Where:** `src/cfg/core/engine.py:271-273` inside `_restore_many`. -- **Problem:** A target whose live doc is `None` is marked `missing`/`blocked` and ABORTS the entire system restore (`:302-303`); there is no `seed_record` path. SPEC §5.11 step 4 + §14 require restoring history-only records via `seed_record` with `--include-deleted` default-true for system restore. As-is, "back to June 7" is incomplete and is actively blocked if anything was deleted since the target moment. -- **Fix:** When `live is None` but the record has history and a target at T, restore via `adapter.seed_record(...)` (method exists in base Protocol, `base.py:64`) instead of treating it as a blocker. Thread `include_deleted` default-true for system restores. - -### H4 — Capability honesty is reported but never enforced; `check_atomicity_scope()` is a tautology -- **Where:** engine mutators never check capability (`engine.py:126-254`); `check_atomicity_scope()` (`mongo.py:285-293`) sets `runtime_cluster == history_cluster` from one client. -- **Problem:** SPEC §2 [FIX-3/V3-1] requires the engine to REFUSE mutating verbs on a non-transactional or non-co-located backend (and run a `pending`-intent fallback on ungated). The flag is computed honestly then ignored: on standalone Mongo, `apply` calls `start_transaction()` and throws a raw pymongo `OperationFailure` instead of a clean refusal, and there is NO non-atomic fallback (`list_pending`/`reconcile` are stubs: `mongo.py:247-251`, `postgres.py:300-303`). Separately, `check_atomicity_scope` cannot detect the cross-cluster split it exists to catch (a single `MongoClient`/`self.db` is used, so `runtime_cluster` and `history_cluster` are always equal) — the Tier-0 "never silently non-atomic" promise is unverified. -- **Fix:** (a) In a shared mutation precondition, call `check_atomicity_scope()`; if `not atomic`, raise a typed `AtomicityUnavailable` mapped to exit 3 with the spec's remedy text (refuse). (b) To make the check real, allow a separate runtime connection/URI in `.cfg.toml`, open it distinctly, and compare actual topology (replica-set name + seed hosts, or `hello().me`/host sets) of runtime vs history; set `atomic=False` with a reason naming both when they differ. Until (b), document that history/heads MUST be in the same DB as runtime (the code already assumes this). - ---- - -## MEDIUM - -### M1 — Interval close keyed on `oid` (the field the project declares non-unique) instead of `seq` -- **Where:** `src/cfg/adapters/mongo.py:193-203` (and the Postgres equivalent close step). -- **Problem:** The prior-interval close matches `{oid: current_head, valid_to: None}`. Because restore re-applies old content, an oid recurs across seqs; relying on `valid_to: None` alone to disambiguate is fragile and contradicts the project's own "oid is not unique" correction. A half-applied fallback or future path leaving two open intervals would corrupt as-of. -- **Fix:** Close by the unique key `(env, collection, record_id, seq=ptr["head_seq"])` — `apply` already holds `ptr["head_seq"]`. - -### M2 — Mid-apply system-restore failure leaves a half-restored system, no report, no resume -- **Where:** `src/cfg/core/engine.py:319-322`. -- **Problem:** The apply loop has no per-record try/except; a `StaleLive`/storage error on record N propagates after 1..N-1 are already committed. No machine-readable per-record report and no `restore_token`/`--resume` (required by SPEC §5.11/§14). (There is a safe all-or-nothing pre-check, so no half-restore from a drift block; the gap is a failure DURING apply.) -- **Fix:** Wrap each `_restore_one` in try/except, accumulate `restored`/`failed` per record, return a structured report + resume token; `--resume` re-applies only non-converged records (convergence already holds via the no-op skip). - -### M3 — Single-record valid-time ref `@{date}` is documented but crashes -- **Where:** `src/cfg/core/engine.py:336-337`. -- **Problem:** `@{2026-06-07}` hits the `@`-prefix branch and `int("{2026-06-07}")` raises ValueError (exit 1). README/USAGE/`core/__init__.py:12` advertise `@{date}` as valid-time. So `cfg diff @{date} =live`, `cfg show @{date}`, `cfg restore @{date}` all crash. Valid-time as-of works only for whole-system restore, not single-record refs. -- **Fix:** Add an `@{...}` branch in `resolve_ref` that parses the date (reuse `parse_when`) and calls `query_history(as_of_valid=..., limit=1)`. Until then, remove `@{date}` from the single-record docs. - -### M4 — `restore --tag` does not verify tag completeness; silently restores a partial moment -- **Where:** `src/cfg/core/engine.py:189-216`. -- **Problem:** Only tagged rows are restored; a configured record absent from the tag is silently omitted (SPEC §5.12 requires a warning). For a "known-good moment" button this can silently leave records at their drifted current state. -- **Fix:** Compute the set of configured/known records, compare against tagged keys, surface a `partial_tag` warning listing uncovered records before applying. - -### M5 — Datetime/Decimal128/Int64 canonicalization can false-positive drift on raw-Mongo-written docs -- **Where:** `src/cfg/core/hashing.py:84-93`. -- **Problem:** `_normalize` renders Python `datetime`→ISO `...Z` and `Decimal`/`float`→decimal strings, but Mongo returns BSON `Date` (ms precision), `Int64`, `Decimal128` depending on writer. A doc cfg committed (Python types) vs the same doc re-read after a raw `mongosh` write can canonicalize differently → false DIRTY / false StaleLive. Since drift detection is THE differentiator and these fields are common in configs, this matters. -- **Fix:** Normalize BSON types explicitly (handle `bson.Decimal128`, `bson.Int64`, truncate datetimes to millisecond UTC to match BSON resolution) before hashing; add round-trip tests through pymongo. - -### M6 — `seed_record` can insert a row that won't be found as live -- **Where:** `src/cfg/adapters/postgres.py:66-82` (uses `doc.get(key, configured_value)` per `live_when` key); `src/cfg/adapters/mongo.py:50-53` (inserts verbatim, same latent issue). -- **Problem:** If the seeded doc contains a `live_when` key with a value different from the configured live value (e.g. `is_active=false` while `live_when={is_active:true}`), the seed inserts a row that `get_record` won't see — it silently vanishes. SPEC §2 requires seed to leave exactly one runtime_filter match. -- **Fix:** On seed, force the `live_when` scalar columns (and Mongo doc keys) to the configured live values, or validate the doc satisfies `live_when` and reject otherwise. - -### M7 — Postgres `apply` has no transient-retry parity with Mongo -- **Where:** `src/cfg/adapters/postgres.py:184` (no retry) vs `src/cfg/adapters/mongo.py:137-151` (3-attempt retry on `TransientTransactionError`). -- **Problem:** Under concurrent commits to the same record, Postgres surfaces a raw serialization error (`40001`)/deadlock (`40P01`) where Mongo silently retries. Not a correctness break (both fail closed), but inconsistent resilience between the two "proven" stores. -- **Fix:** Wrap PG `apply` in an equivalent retry on `40001`/`40P01`. - ---- - -## LOW - -### L1 — Mongo `query_history` returns raw storage docs with non-JSON-safe `_id` -- **Where:** `src/cfg/adapters/mongo.py:73-115` vs `src/cfg/adapters/postgres.py` `_history_row` normalizer. -- **Problem:** Postgres normalizes rows to a clean dict; Mongo returns the storage doc as-is (includes ObjectId `_id`). `log`/`list` JSON output will contain a non-serializable ObjectId on Mongo and not on Postgres. -- **Fix:** Give Mongo a `_history_row`-equivalent projection so both adapters return identical, JSON-safe row shapes. - -### L2 — Heads CAS is not self-contained (relies on txn + unique index, not a conditional filter) -- **Where:** `src/cfg/adapters/mongo.py:209-225`. -- **Problem:** The heads `update_one` filter is keyed only on `(env, collection, record_id)`, not `head_oid: expected_head_oid`. Safe today via the surrounding txn + unique-seq index, but fragile defense-in-depth. -- **Fix:** Add `"head_oid": expected_head_oid` (or `{"$exists": False}` when None) to the filter and assert `matched_count == 1`, raising `StaleHead` otherwise — a true CAS that holds even outside a txn. - -### L3 — Engine accepts empty `-m` reason -- **Where:** `src/cfg/core/engine.py:375-407` (`_entry`) and all mutators; CLI requires the flag present but accepts `-m ""` (`main.py:133`). -- **Fix:** Validate non-empty (stripped) message in the engine mutators, independent of interface (SPEC §10 `require_reason`). - -### L4 — Config-key naming drift between SPEC §8, the loader, and the example -- **Where:** loader reads `ignore_patterns`/`ignore_paths`/`secret_fields` (`config.py:135-138`); SPEC §8 `[hash]` names them `ignore_globs`/`ignore_paths`/`strip_on_store`. Example matches the loader (friendly names), so no runtime bug, but the "authoritative 1:1 mapping in one place" promise is unmet. -- **Fix:** Reconcile loader to accept the §8 technical names (or update §8 to the friendly names) and document the alias map in one place. - -### L5 — `restore_system` dry-run skips authorization (info disclosure) -- **Where:** `src/cfg/core/engine.py:164-165,196-197`. -- **Problem:** Dry-run does no writes, but an unauthorized author can enumerate which records a system restore would touch and their seqs. -- **Fix:** Authorize a read-scoped role for dry-run too, or document as intentional. - -### L6 — Core-purity test doesn't cover non-core first-party packages -- **Where:** `tests/test_core_purity.py:13` scans only `src/cfg/core/`. -- **Problem:** `src/cfg/{approval,interfaces,mcp,ui}` are unguarded; clean now but a future driver/LLM import there wouldn't fail CI. -- **Fix:** Broaden the scan to all of `src/cfg/` except `src/cfg/adapters/`. - -### L7 — `tag`/system restore silently skip records with no HEAD -- **Where:** `src/cfg/core/engine.py:369-370`. -- **Problem:** A live-but-unversioned (`new`) record is excluded from a system tag with no notice, so a later `restore --tag` won't touch it. -- **Fix:** Warn/report skipped `new` records at tag time. - -### L8 — `git_shas` / `link_git_sha` / `activate_config` / `redact_field` from SPEC §2 are unimplemented on both adapters -- **Note:** `git_shas` is always written as `[]` (`engine.py:404`); the spec's git-linkage, activation, and redaction adapter methods don't exist in `base.py` or either adapter. Parity is preserved (equally absent), but the spec §2 surface is not the built surface. Fine for v1 (these are deferred), just recording the gap. - -### L9 — No integration test runs either adapter ("proven on two stores" is proven by inspection only) -- **Where:** only `test_core_purity` and `test_authz` exist in this area; no Mongo/Postgres contract test, no fixtures. -- **Fix:** Add one parametrized adapter-contract test (same assertions, both adapters) — the artifact that would actually substantiate the headline claim. - ---- - -## Suggested fix order (for the follow-up session) -1. **H1 secret pre-flight** (the embarrassing-in-public one: a forgotten api_key in history forever). -2. **H2 LLM egress allowlist + consent + payload filter.** -3. **H3 deleted-record system restore via seed_record** (makes "back to June 7" actually complete). -4. **H4 capability enforcement** (refuse on non-transactional backend; make `check_atomicity_scope` real). -5. **M1 close intervals by seq**, then M2-M7 as time allows. -6. **L9 add a real two-adapter contract test** to substantiate the parity claim, then the rest of the Lows. diff --git a/plugins/cfg_impact/README.md b/plugins/cfg_impact/README.md index 20ed4ee..c0c5104 100644 --- a/plugins/cfg_impact/README.md +++ b/plugins/cfg_impact/README.md @@ -4,6 +4,25 @@ Optional system-impact analysis for cfgit. This package is deliberately outside `src/cfg/core/`. The cfgit core stays LLM-free and provider-free; this plugin owns both deterministic impact analysis and optional LLM narration. +## Install + +Install cfgit with the database extras you use, then install the optional impact +plugin from PyPI into the same Python environment: + +- [`cfgit-impact`](https://pypi.org/project/cfgit-impact/) + +```bash +pip install 'cfgit[mongo,postgres,mcp]' +pip install cfgit-impact +``` + +For `pipx` installs: + +```bash +pipx install 'cfgit[mongo,postgres,mcp]' +pipx inject cfgit cfgit-impact +``` + ## Boundary - `src/cfg/core/` never imports this plugin.