diff --git a/openkb/cli.py b/openkb/cli.py index b48ebc17..a6236b7d 100644 --- a/openkb/cli.py +++ b/openkb/cli.py @@ -1494,6 +1494,36 @@ def lint(ctx, fix): asyncio.run(run_lint(kb_dir)) +@cli.command() +@click.option("--open/--no-open", "open_browser", default=True, + help="Open the graph in your browser after generating (default: on; --no-open for headless).") +@click.pass_context +@_with_kb_lock(exclusive=False) +def visualize(ctx, open_browser): + """Render the wiki's [[wikilink]] graph as a self-contained interactive HTML page.""" + kb_dir = _find_kb_dir(ctx.obj.get("kb_dir_override")) + if kb_dir is None: + click.echo("No knowledge base found. Run `openkb init` first.") + return + from openkb import visualize as viz + graph = viz.build_graph(kb_dir / "wiki") + if not graph["nodes"]: + click.echo("No wiki pages to visualize yet. Run `openkb add` first.") + return + out = kb_dir / "output" / "visualize" / "graph.html" + out.parent.mkdir(parents=True, exist_ok=True) + out.write_text(viz.render_html(graph), encoding="utf-8") + click.echo(f"Graph written to {out} ({len(graph['nodes'])} nodes, {len(graph['edges'])} edges)") + if open_browser: + import webbrowser + try: + opened = webbrowser.open(out.resolve().as_uri()) # resolve() so a relative --kb-dir still yields a valid file URI + except Exception: + opened = False + if not opened: + click.echo("(couldn't launch a browser — open the file above manually, or use --no-open)") + + def print_list(kb_dir: Path) -> None: """Print all documents in the knowledge base. Usable from CLI and chat REPL.""" openkb_dir = kb_dir / ".openkb" diff --git a/openkb/templates/graph.html b/openkb/templates/graph.html new file mode 100644 index 00000000..996b73dd --- /dev/null +++ b/openkb/templates/graph.html @@ -0,0 +1,546 @@ + + + + + +openkb · knowledge graph + + + +
+
+
+ + +
openkbknowledge graph
+ +
+ 0nodes + · + 0edges +
+
+ +
+ +
+ scroll zoom   drag bg rotate   click inspect
+ drag node pull out & pin   dbl-click release +
+ +
+ + + + diff --git a/openkb/visualize.py b/openkb/visualize.py new file mode 100644 index 00000000..a8748e3d --- /dev/null +++ b/openkb/visualize.py @@ -0,0 +1,67 @@ +"""Render the wiki's [[wikilink]] graph as a self-contained interactive HTML page.""" +from __future__ import annotations + +import json +from importlib import resources +from pathlib import Path + +from openkb import frontmatter +from openkb.lint import _extract_wikilinks, _normalize_target +from openkb.schema import PAGE_CONTENT_DIRS + +# Singular display type per content dir; falls back to a derived name for any +# dir not listed (so a new PAGE_CONTENT_DIRS entry never KeyErrors here). +_DIR_TYPE = {"summaries": "Summary", "concepts": "Concept", "entities": "Entity"} + + +def _type_for_dir(sub: str) -> str: + return _DIR_TYPE.get(sub) or (sub[:-1] if sub.endswith("s") else sub).capitalize() or sub + + +def build_graph(wiki_dir: Path) -> dict: + """Collect nodes (pages), directed edges (wikilinks), and the set of types.""" + nodes: dict[str, dict] = {} + texts: dict[str, str] = {} # nid -> file text, read once and reused for edges + for sub in PAGE_CONTENT_DIRS: + d = wiki_dir / sub + if not d.exists(): + continue + for p in sorted(d.glob("*.md")): + nid = f"{sub}/{p.stem}" + text = p.read_text(encoding="utf-8") + texts[nid] = text + fm = frontmatter.parse(text) + t = fm.get("type") + t = t.strip() if isinstance(t, str) and t.strip() else _type_for_dir(sub) + desc = fm.get("description") + desc = desc.strip() if isinstance(desc, str) else "" + srcs = fm.get("sources") + srcs = [str(s) for s in srcs] if isinstance(srcs, list) else [] + ft = fm.get("full_text") # summaries record their origin document here, not in `sources` + if isinstance(ft, str) and ft.strip(): + srcs.insert(0, ft.strip()) + nodes[nid] = {"id": nid, "label": p.stem, "type": t, + "description": desc, "sources": srcs, "out": 0, "in": 0} + + norm = {_normalize_target(nid): nid for nid in nodes} + edges: list[dict] = [] + seen: set[tuple[str, str]] = set() + for src, text in texts.items(): + for raw in _extract_wikilinks(text): + tgt = norm.get(_normalize_target(raw)) + if not tgt or tgt == src or (src, tgt) in seen: + continue + seen.add((src, tgt)) + edges.append({"source": src, "target": tgt}) + nodes[src]["out"] += 1 + nodes[tgt]["in"] += 1 + + types = sorted({n["type"] for n in nodes.values()}) + return {"nodes": list(nodes.values()), "edges": edges, "types": types} + + +def render_html(graph: dict) -> str: + """Inject the graph as JSON into the self-contained HTML template.""" + template = resources.files("openkb").joinpath("templates/graph.html").read_text(encoding="utf-8") + data = json.dumps(graph, ensure_ascii=False).replace(" breakout + return template.replace("__GRAPH_DATA__", data) diff --git a/tests/test_generator.py b/tests/test_generator.py index 4ce9f145..67dbca61 100644 --- a/tests/test_generator.py +++ b/tests/test_generator.py @@ -85,7 +85,7 @@ async def test_generator_deck_dispatches_to_deck_creator(tmp_path): # validation up to self.validation. from openkb.agent.skill_runner import SkillRunResult fake_run_result = SkillRunResult( - skill_name="openkb-deck-editorial", + skill_name="openkb-deck-neon", output_path=gen.output_dir / "index.html", validation=DeckValidationResult(), metadata={"mode": "deck"}, @@ -102,7 +102,7 @@ async def test_generator_deck_dispatches_to_deck_creator(tmp_path): intent="…", model="openai/gpt-4o", critique=False, - skill_name="openkb-deck-editorial", + skill_name="openkb-deck-neon", ) regen.assert_not_called() # marketplace is skill-only assert result == gen.output_dir diff --git a/tests/test_visualize.py b/tests/test_visualize.py new file mode 100644 index 00000000..3d7f2016 --- /dev/null +++ b/tests/test_visualize.py @@ -0,0 +1,57 @@ +from pathlib import Path + +from openkb.visualize import build_graph, render_html + + +def _wiki(tmp_path: Path) -> Path: + wiki = tmp_path / "wiki" + for sub in ("summaries", "concepts", "entities", "reports", "sources"): + (wiki / sub).mkdir(parents=True) + (wiki / "index.md").write_text("# Index\n", encoding="utf-8") + return wiki + + +def test_build_graph_nodes_edges_types(tmp_path): + wiki = _wiki(tmp_path) + (wiki / "summaries" / "paper.md").write_text( + '---\ntype: "Summary"\ndescription: "A paper."\nfull_text: "sources/paper.json"\n---\n\n' + "Discusses [[concepts/attention]] and [[entities/anthropic]].\n", encoding="utf-8") + (wiki / "concepts" / "attention.md").write_text( + '---\ntype: "Concept"\ndescription: "Focus."\nsources: ["summaries/paper"]\n---\n\n' + "Used by [[concepts/attention]] (self) and [[concepts/missing]] (broken).\n", encoding="utf-8") + (wiki / "entities" / "anthropic.md").write_text( + '---\ntype: "Organization"\ndescription: "AI lab."\n---\n\n' + "# Anthropic\n", encoding="utf-8") + (wiki / "concepts" / "orphan.md").write_text("# Orphan\n\nNo links.\n", encoding="utf-8") + + g = build_graph(wiki) + ids = {n["id"] for n in g["nodes"]} + assert ids == {"summaries/paper", "concepts/attention", "entities/anthropic", "concepts/orphan"} + by = {n["id"]: n for n in g["nodes"]} + assert by["concepts/orphan"]["type"] == "Concept" + assert by["entities/anthropic"]["type"] == "Organization" + edge_pairs = {(e["source"], e["target"]) for e in g["edges"]} + assert ("summaries/paper", "concepts/attention") in edge_pairs + assert ("summaries/paper", "entities/anthropic") in edge_pairs + assert not any(e["target"] == "concepts/missing" for e in g["edges"]) + assert not any(e["source"] == e["target"] for e in g["edges"]) + assert by["concepts/attention"]["in"] == 1 and by["summaries/paper"]["out"] == 2 + assert g["types"] == ["Concept", "Organization", "Summary"] + # sources: concepts use the `sources` field; summaries fall back to `full_text` (the origin doc) + assert by["concepts/attention"]["sources"] == ["summaries/paper"] + assert by["summaries/paper"]["sources"] == ["sources/paper.json"] + + +def test_build_graph_empty_wiki(tmp_path): + assert build_graph(_wiki(tmp_path)) == {"nodes": [], "edges": [], "types": []} + + +def test_render_html_self_contained(): + g = {"nodes":[{"id":"concepts/a","label":"a","type":"Concept","description":"x—y","sources":[],"out":0,"in":0}], + "edges":[], "types":["Concept"]} + html = render_html(g) + assert " Path: + for sub in ("summaries", "concepts", "entities"): + (tmp_path / "wiki" / sub).mkdir(parents=True) + (tmp_path / ".openkb").mkdir() + (tmp_path / ".openkb" / "config.yaml").write_text("model: gpt-4o-mini\n", encoding="utf-8") + (tmp_path / "wiki" / "concepts" / "a.md").write_text( + '---\ntype: "Concept"\ndescription: "d"\n---\n\nlinks [[concepts/b]]\n', encoding="utf-8") + (tmp_path / "wiki" / "concepts" / "b.md").write_text( + '---\ntype: "Concept"\ndescription: "d2"\n---\n\n# B\n', encoding="utf-8") + return tmp_path + + +def test_visualize_writes_html_and_opens_by_default(tmp_path): + kb = _kb(tmp_path) + with patch("openkb.cli._find_kb_dir", return_value=kb), \ + patch("webbrowser.open") as wb: + result = CliRunner().invoke(cli, ["visualize"]) + assert result.exit_code == 0, result.output + out = kb / "output" / "visualize" / "graph.html" + assert out.exists() + html = out.read_text(encoding="utf-8") + assert "