diff --git a/src/unstract/clone/context.py b/src/unstract/clone/context.py index 8441748..238e837 100644 --- a/src/unstract/clone/context.py +++ b/src/unstract/clone/context.py @@ -15,6 +15,7 @@ import threading from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any +from uuid import NAMESPACE_URL, uuid5 if TYPE_CHECKING: from unstract.clone.client import PlatformClient @@ -74,10 +75,30 @@ class RemapTable: def __init__(self) -> None: self._table: dict[str, dict[str, str]] = {} + # Synthetic target ids minted by record_planned() in dry-run. Tracked + # so the report can mask them — they are not real target ids. + self._planned: set[str] = set() def record(self, entity: str, src_uuid: str, tgt_uuid: str) -> None: self._table.setdefault(entity, {})[src_uuid] = tgt_uuid + def record_planned(self, entity: str, src_uuid: str) -> str: + """Dry-run only: record a deterministic synthetic target id so + dependent phases can resolve the FK and plan-count without writing. + Never reaches the wire, so the fake id stays in-memory scaffolding. + """ + tgt_uuid = str(uuid5(NAMESPACE_URL, f"planned:{entity}:{src_uuid}")) + self.record(entity, src_uuid, tgt_uuid) + self._planned.add(tgt_uuid) + return tgt_uuid + + def is_planned(self, tgt_uuid: str) -> bool: + """True if ``tgt_uuid`` is a dry-run synthetic id (no real row on + target). Callers use this to skip live target lookups that would + query a non-existent id. + """ + return tgt_uuid in self._planned + def resolve(self, entity: str, src_uuid: str) -> str | None: return self._table.get(entity, {}).get(src_uuid) @@ -90,9 +111,19 @@ def resolve_any(self, src_uuid: str) -> str | None: return hit return None - def snapshot(self) -> dict[str, dict[str, str]]: - """Read-only snapshot for the post-run report.""" - return {entity: dict(m) for entity, m in self._table.items()} + def snapshot(self, *, hide_planned: bool = False) -> dict[str, dict[str, str]]: + """Read-only snapshot for the post-run report. ``hide_planned`` masks + dry-run synthetic ids (rendered as ``"(planned)"``) while keeping the + per-entity counts intact. + """ + + def _val(tgt: str) -> str: + return "(planned)" if hide_planned and tgt in self._planned else tgt + + return { + entity: {src: _val(tgt) for src, tgt in m.items()} + for entity, m in self._table.items() + } @dataclass diff --git a/src/unstract/clone/orchestrator.py b/src/unstract/clone/orchestrator.py index 9fbcd04..85df6ea 100644 --- a/src/unstract/clone/orchestrator.py +++ b/src/unstract/clone/orchestrator.py @@ -84,6 +84,7 @@ def clone( target=Endpoint( base_url=target.base_url, organization_id=target.organization_id ), + dry_run=opts.dry_run, ) run_started = time.perf_counter() @@ -112,7 +113,7 @@ def clone( ) report.total_duration_s = time.perf_counter() - run_started - report.remap_snapshot = ctx.remap.snapshot() + report.remap_snapshot = ctx.remap.snapshot(hide_planned=opts.dry_run) return report finally: src_client.close() diff --git a/src/unstract/clone/phases/adapter.py b/src/unstract/clone/phases/adapter.py index 28082c5..873fa13 100644 --- a/src/unstract/clone/phases/adapter.py +++ b/src/unstract/clone/phases/adapter.py @@ -97,7 +97,8 @@ def _clone_one( ) elif self.ctx.options.dry_run: with lock: - result.skipped += 1 + result.created += 1 + self.ctx.remap.record_planned("adapter", src_id) logger.info( "[dry-run] would create adapter '%s' [%s] src=%s", name, atype, src_id ) diff --git a/src/unstract/clone/phases/api_deployment.py b/src/unstract/clone/phases/api_deployment.py index b707045..ee1f65a 100644 --- a/src/unstract/clone/phases/api_deployment.py +++ b/src/unstract/clone/phases/api_deployment.py @@ -112,7 +112,8 @@ def _clone_one( ) elif self.ctx.options.dry_run: with lock: - result.skipped += 1 + result.created += 1 + self.ctx.remap.record_planned("api_deployment", src_id) logger.info( "[dry-run] would create api_deployment '%s' src=%s", api_name, src_id ) diff --git a/src/unstract/clone/phases/connector.py b/src/unstract/clone/phases/connector.py index 3243542..9f9c8ca 100644 --- a/src/unstract/clone/phases/connector.py +++ b/src/unstract/clone/phases/connector.py @@ -134,7 +134,8 @@ def _clone_one( ) elif self.ctx.options.dry_run: with lock: - result.skipped += 1 + result.created += 1 + self.ctx.remap.record_planned("connector", src_id) logger.info("[dry-run] would create connector '%s' src=%s", name, src_id) return else: diff --git a/src/unstract/clone/phases/custom_tool.py b/src/unstract/clone/phases/custom_tool.py index 53c2936..372c3fd 100644 --- a/src/unstract/clone/phases/custom_tool.py +++ b/src/unstract/clone/phases/custom_tool.py @@ -145,12 +145,12 @@ def _clone_one( "tool_name": tool_name, } + # Each sub-path (adopt / fresh / fresh-dry-run) owns its own + # custom_tool remap, since only it knows whether the target id is + # real or a planned synthetic. if tgt_tool_id is None: return - with lock: - self.ctx.remap.record("custom_tool", src_tool_id, tgt_tool_id) - # Neither the export blob nor list rows carry share axes — # share state comes from the source detail. self.apply_share( @@ -163,6 +163,10 @@ def _clone_one( ) if self.ctx.options.dry_run: + # Can't republish the registry without writing, but ToolInstance + # needs a prompt_studio_registry remap to plan-count. Mirror it + # with a planned id derived from the source registry (read-only). + self._record_planned_registry(src_tool_id, tool_name, lock) return # Tools never exported on source (e.g. empty projects — backend @@ -223,6 +227,30 @@ def _clone_one( tgt_regs[0]["prompt_registry_id"], ) + def _record_planned_registry( + self, src_tool_id: str, tool_name: str, lock: threading.Lock + ) -> None: + """Dry-run: record a planned prompt_studio_registry remap from the + source registry id, so ToolInstancePhase can resolve tool_id and + plan-count. No-op for tools never exported on source (no registry). + """ + try: + src_regs = self.ctx.source.list_registries(custom_tool=src_tool_id) + except Exception as e: + logger.warning( + "[dry-run] source registry lookup failed for tool '%s' " + "(tool_instance plan may under-count): %s", + tool_name, + e, + ) + return + if not src_regs: + return + with lock: + self.ctx.remap.record_planned( + "prompt_studio_registry", src_regs[0]["prompt_registry_id"] + ) + def _adopt( self, match: dict[str, Any], @@ -240,7 +268,8 @@ def _adopt( tgt_tool_id = match["tool_id"] if self.ctx.options.dry_run: with lock: - result.skipped += 1 + result.adopted += 1 + self.ctx.remap.record("custom_tool", src_tool_id, tgt_tool_id) logger.info( "[dry-run] would sync prompts into adopted tool '%s' src=%s -> tgt=%s", tool_name, @@ -260,6 +289,7 @@ def _adopt( with lock: result.adopted += 1 + self.ctx.remap.record("custom_tool", src_tool_id, tgt_tool_id) logger.info( "adopted tool '%s' src=%s -> tgt=%s (prompts re-synced)", tool_name, @@ -276,14 +306,9 @@ def _create_fresh( result: PhaseResult, lock: threading.Lock, ) -> str | None: - if self.ctx.options.dry_run: - with lock: - result.skipped += 1 - logger.info( - "[dry-run] would import tool '%s' src=%s", tool_name, src_tool_id - ) - return None - + # Run the source-side validations even in dry-run — they decide + # whether a real run would create or frictionless-skip, so the plan + # counts must reflect them. Only the target-write steps are stubbed. default_profile = self._source_default_profile(src_tool_id, tool_name) if default_profile is None: with lock: @@ -300,6 +325,18 @@ def _create_fresh( ) return None + if self.ctx.options.dry_run: + # Target adapter resolution is skipped: adapters this run would + # create don't exist on target yet, so it can't resolve. The + # frictionless check above already caught the real skip cases. + with lock: + result.created += 1 + tgt_tool_id = self.ctx.remap.record_planned("custom_tool", src_tool_id) + logger.info( + "[dry-run] would import tool '%s' src=%s", tool_name, src_tool_id + ) + return tgt_tool_id + adapter_ids = self._resolve_target_adapter_ids(default_profile, tool_name) if adapter_ids is None: with lock: @@ -321,6 +358,7 @@ def _create_fresh( tgt_tool_id = tgt["tool_id"] with lock: result.created += 1 + self.ctx.remap.record("custom_tool", src_tool_id, tgt_tool_id) logger.info( "created tool '%s' src=%s -> tgt=%s (needs_adapter_config=%s)", tool_name, diff --git a/src/unstract/clone/phases/files.py b/src/unstract/clone/phases/files.py index a241fc1..6e57571 100644 --- a/src/unstract/clone/phases/files.py +++ b/src/unstract/clone/phases/files.py @@ -126,18 +126,23 @@ def _build_tool_tasks( report: CloneReport, result: PhaseResult, ) -> list[_FileTask]: - try: - tgt_docs = self.ctx.target.list_prompt_documents(tgt_tool_id) - except Exception as e: - logger.exception( - "files: failed to list target DM rows for tool %s: %s", - tool_name, - e, - ) - result.failed += 1 - result.errors.append(f"list target docs {tool_name}: {e}") - return [] - target_names = {d["document_name"] for d in tgt_docs} + # A planned (dry-run) tool id has no row on target; skip the live + # lookup and treat every source file as missing → predicted upload. + if self.ctx.options.dry_run and self.ctx.remap.is_planned(tgt_tool_id): + target_names: set[str] = set() + else: + try: + tgt_docs = self.ctx.target.list_prompt_documents(tgt_tool_id) + except Exception as e: + logger.exception( + "files: failed to list target DM rows for tool %s: %s", + tool_name, + e, + ) + result.failed += 1 + result.errors.append(f"list target docs {tool_name}: {e}") + return [] + target_names = {d["document_name"] for d in tgt_docs} tasks: list[_FileTask] = [] for doc in src_docs: @@ -163,7 +168,7 @@ def _build_tool_tasks( ) continue if self.ctx.options.dry_run: - result.skipped += 1 + result.created += 1 logger.info( "[dry-run] files: would clone tool=%s file=%s", tool_name, diff --git a/src/unstract/clone/phases/group.py b/src/unstract/clone/phases/group.py index 1b54f7b..dac0b1d 100644 --- a/src/unstract/clone/phases/group.py +++ b/src/unstract/clone/phases/group.py @@ -75,7 +75,8 @@ def _clone_one( logger.info("reusing group '%s' src=%s -> tgt=%s", name, src_id, tgt["id"]) elif self.ctx.options.dry_run: with lock: - result.skipped += 1 + result.created += 1 + self.ctx.remap.record_planned("group", str(src_id)) logger.info("[dry-run] would create group '%s' src=%s", name, src_id) if self.ctx.options.clone_group_members: # Still computed so would-skip members show up in the report. diff --git a/src/unstract/clone/phases/pipeline.py b/src/unstract/clone/phases/pipeline.py index 121f3a4..f1a93ea 100644 --- a/src/unstract/clone/phases/pipeline.py +++ b/src/unstract/clone/phases/pipeline.py @@ -116,7 +116,8 @@ def _clone_one( ) elif self.ctx.options.dry_run: with lock: - result.skipped += 1 + result.created += 1 + self.ctx.remap.record_planned("pipeline", src_id) logger.info("[dry-run] would create pipeline '%s' src=%s", name, src_id) return else: diff --git a/src/unstract/clone/phases/tag.py b/src/unstract/clone/phases/tag.py index 9cbca05..fbf8835 100644 --- a/src/unstract/clone/phases/tag.py +++ b/src/unstract/clone/phases/tag.py @@ -76,7 +76,8 @@ def _clone_one( logger.info("adopted tag '%s' src=%s -> tgt=%s", name, src_id, tgt["id"]) elif self.ctx.options.dry_run: with lock: - result.skipped += 1 + result.created += 1 + self.ctx.remap.record_planned("tag", src_id) logger.info("[dry-run] would create tag '%s' src=%s", name, src_id) return else: diff --git a/src/unstract/clone/phases/tool_instance.py b/src/unstract/clone/phases/tool_instance.py index 4b80383..3234189 100644 --- a/src/unstract/clone/phases/tool_instance.py +++ b/src/unstract/clone/phases/tool_instance.py @@ -125,25 +125,31 @@ def _clone_workflow_tools( result.skipped += 1 return - try: - existing = self.ctx.target.list_tool_instances(workflow_id=tgt_wf_id) - except Exception as e: - logger.exception( - "Failed to list target tool_instances for wf %s: %s", tgt_wf_id, e - ) - with lock: - result.failed += 1 - result.errors.append(f"list tgt tool_instances {tgt_wf_id}: {e}") - return + # A planned (dry-run) workflow id has no row on target; its + # tool_instance can't exist yet, so predict a create without the + # live lookup against a non-existent id. + if self.ctx.options.dry_run and self.ctx.remap.is_planned(tgt_wf_id): + existing: list[dict[str, Any]] = [] + else: + try: + existing = self.ctx.target.list_tool_instances(workflow_id=tgt_wf_id) + except Exception as e: + logger.exception( + "Failed to list target tool_instances for wf %s: %s", tgt_wf_id, e + ) + with lock: + result.failed += 1 + result.errors.append(f"list tgt tool_instances {tgt_wf_id}: {e}") + return if existing: tgt_ti = existing[0] if self.ctx.options.dry_run: with lock: - result.skipped += 1 + result.adopted += 1 self.ctx.remap.record("tool_instance", src_ti_id, tgt_ti["id"]) logger.info( - "[dry-run] would re-PATCH metadata on adopted tool_instance " + "[dry-run] would adopt tool_instance (metadata PATCH skipped) " "src=%s -> tgt=%s (workflow %s)", src_ti_id, tgt_ti["id"], @@ -160,7 +166,8 @@ def _clone_workflow_tools( ) elif self.ctx.options.dry_run: with lock: - result.skipped += 1 + result.created += 1 + self.ctx.remap.record_planned("tool_instance", src_ti_id) logger.info( "[dry-run] would create tool_instance for tgt workflow %s " "(src tool_instance %s)", diff --git a/src/unstract/clone/phases/workflow.py b/src/unstract/clone/phases/workflow.py index 7e3d3f7..d9256a1 100644 --- a/src/unstract/clone/phases/workflow.py +++ b/src/unstract/clone/phases/workflow.py @@ -124,7 +124,8 @@ def _clone_one( ) elif self.ctx.options.dry_run: with lock: - result.skipped += 1 + result.created += 1 + self.ctx.remap.record_planned("workflow", src_id) logger.info("[dry-run] would create workflow '%s' src=%s", name, src_id) return else: diff --git a/src/unstract/clone/phases/workflow_endpoint.py b/src/unstract/clone/phases/workflow_endpoint.py index a9ffa7a..1101ec3 100644 --- a/src/unstract/clone/phases/workflow_endpoint.py +++ b/src/unstract/clone/phases/workflow_endpoint.py @@ -80,6 +80,23 @@ def _clone_workflow_endpoints( result.errors.append(f"list src endpoints {src_wf_id}: {e}") return + # A planned (dry-run) workflow would be freshly created, and the + # backend auto-creates its SOURCE/DEST endpoints on create. They + # don't exist on target yet, so predict a patch per source endpoint + # without the live lookup against the synthetic id. + if self.ctx.options.dry_run and self.ctx.remap.is_planned(tgt_wf_id): + for src_ep in src_endpoints: + with lock: + result.created += 1 + self.ctx.remap.record_planned("workflow_endpoint", src_ep["id"]) + logger.info( + "[dry-run] would PATCH %s endpoint on new workflow %s (src=%s)", + src_ep["endpoint_type"], + tgt_wf_id, + src_ep["id"], + ) + return + try: tgt_endpoints = self.ctx.target.list_workflow_endpoints( workflow_id=tgt_wf_id @@ -124,17 +141,9 @@ def _patch_endpoint( tgt_ep_id = tgt_ep["id"] etype = src_ep["endpoint_type"] - if self.ctx.options.dry_run: - with lock: - result.skipped += 1 - logger.info( - "[dry-run] would PATCH %s endpoint src=%s -> tgt=%s", - etype, - src_ep_id, - tgt_ep_id, - ) - return - + # Resolve the connector before the dry-run gate so the plan mirrors + # the real run's unmapped-connector skip (an unmapped connector is + # left out of both counts, not predicted as a patch). src_conn_id = _extract_connector_id(src_ep) tgt_conn_id: str | None = None if src_conn_id: @@ -157,6 +166,19 @@ def _patch_endpoint( ) return + if self.ctx.options.dry_run: + with lock: + result.created += 1 + self.ctx.remap.record_planned("workflow_endpoint", src_ep_id) + logger.info( + "[dry-run] would PATCH %s endpoint src=%s -> tgt=%s (connector %s)", + etype, + src_ep_id, + tgt_ep_id, + tgt_conn_id, + ) + return + payload: dict[str, Any] = { "configuration": remap_uuids( src_ep.get("configuration") or {}, self.ctx.remap diff --git a/src/unstract/clone/report.py b/src/unstract/clone/report.py index bfc52ea..1066585 100644 --- a/src/unstract/clone/report.py +++ b/src/unstract/clone/report.py @@ -40,6 +40,7 @@ class Endpoint: class CloneReport: source: Endpoint | None = None target: Endpoint | None = None + dry_run: bool = False phases: list[PhaseResult] = field(default_factory=list) skipped_phases: list[str] = field(default_factory=list) remap_snapshot: dict[str, dict[str, str]] = field(default_factory=dict) @@ -82,6 +83,11 @@ def render(self) -> str: self._render_failures_summary(console_print=console.print, rich=True) self._render_warnings_summary(console_print=console.print, rich=True) self._render_endpoints(console.print) + if self.dry_run: + console.print( + "[bold yellow]DRY RUN[/bold yellow] — nothing written; " + "counts are the predicted real run" + ) table = Table(title="Clone Report", header_style="bold cyan") table.add_column("Phase", style="bold", justify="left") for col in ("Created", "Adopted", "Skipped", "Failed", "Time"): @@ -162,6 +168,8 @@ def _render_plain(self) -> str: self._render_warnings_summary(console_print=lines.append, rich=False) lines.extend(["Clone Report", "=" * 60]) self._render_endpoints(lines.append) + if self.dry_run: + lines.append("DRY RUN — nothing written; counts are the predicted real run") header = ( f"{'Phase':<24}{'Created':>10}{'Adopted':>10}" f"{'Skipped':>10}{'Failed':>10}{'Time':>10}" @@ -201,6 +209,7 @@ def as_dict(self) -> dict[str, Any]: if self.target else None ), + "dry_run": self.dry_run, "phases": [ { "name": p.name, diff --git a/tests/clone/test_adapter_phase.py b/tests/clone/test_adapter_phase.py index d2b0311..40b4a0e 100644 --- a/tests/clone/test_adapter_phase.py +++ b/tests/clone/test_adapter_phase.py @@ -138,9 +138,13 @@ def test_dry_run_makes_no_posts(): result = AdapterPhase(ctx).run(report) - assert result.skipped == 1 - assert result.created == 0 + # Dry-run predicts the create (count matches a real run) but writes nothing + # and records a synthetic remap so dependent phases can plan. + assert result.created == 1 + assert result.skipped == 0 assert tgt.posts == [] + planned = ctx.remap.resolve("adapter", "src-a") + assert planned is not None and ctx.remap.is_planned(planned) def test_abort_on_name_conflict_raises(): diff --git a/tests/clone/test_api_deployment_phase.py b/tests/clone/test_api_deployment_phase.py index dc25d7a..ea6cd89 100644 --- a/tests/clone/test_api_deployment_phase.py +++ b/tests/clone/test_api_deployment_phase.py @@ -149,8 +149,11 @@ def test_dry_run_makes_no_writes(): result = APIDeploymentPhase(ctx).run(CloneReport()) - assert result.skipped == 1 + assert result.created == 1 + assert result.skipped == 0 assert tgt.posts == [] + planned = ctx.remap.resolve("api_deployment", "src-dep-1") + assert planned is not None and ctx.remap.is_planned(planned) def test_abort_on_name_conflict_raises(): diff --git a/tests/clone/test_connector_phase.py b/tests/clone/test_connector_phase.py index 28f17f2..78de5f2 100644 --- a/tests/clone/test_connector_phase.py +++ b/tests/clone/test_connector_phase.py @@ -175,9 +175,11 @@ def test_dry_run_makes_no_posts(): result = ConnectorPhase(ctx).run(report) - assert result.skipped == 1 - assert result.created == 0 + assert result.created == 1 + assert result.skipped == 0 assert tgt.posts == [] + planned = ctx.remap.resolve("connector", "src-a") + assert planned is not None and ctx.remap.is_planned(planned) def test_abort_on_name_conflict_raises(): diff --git a/tests/clone/test_custom_tool_phase.py b/tests/clone/test_custom_tool_phase.py index 5c6746e..98d56a4 100644 --- a/tests/clone/test_custom_tool_phase.py +++ b/tests/clone/test_custom_tool_phase.py @@ -306,15 +306,21 @@ def test_dry_run_makes_no_writes(): src = FakeClient() tgt = FakeClient() _preload_source_tool(src, "src-tool-x", "T") + _seed_source_adapters(src) _seed_target_adapters(tgt) ctx = _ctx(src, tgt, dry_run=True) result = CustomToolPhase(ctx).run(CloneReport()) - assert result.skipped == 1 + # Predicts the import (count matches a real run) without writing, and + # records a planned custom_tool remap so the files phase can plan. + assert result.created == 1 + assert result.skipped == 0 assert tgt.import_calls == [] assert tgt.sync_calls == [] assert tgt.export_tool_calls == [] + planned = ctx.remap.resolve("custom_tool", "src-tool-x") + assert planned is not None and ctx.remap.is_planned(planned) def test_dry_run_on_adopt_path_does_not_republish_registry(): @@ -329,7 +335,9 @@ def test_dry_run_on_adopt_path_does_not_republish_registry(): result = CustomToolPhase(ctx).run(CloneReport()) - assert result.skipped == 1 + # Adopt path counts as adopted (matching a real run). + assert result.adopted == 1 + assert result.skipped == 0 assert tgt.sync_calls == [] assert tgt.import_calls == [] # Critical regression: registry republish must NOT fire on dry-run. diff --git a/tests/clone/test_files_phase.py b/tests/clone/test_files_phase.py index 50f739e..f444510 100644 --- a/tests/clone/test_files_phase.py +++ b/tests/clone/test_files_phase.py @@ -327,8 +327,9 @@ def test_dry_run_makes_no_writes_even_for_missing_files(): result = FilesPhase(ctx).run(report) - assert result.skipped == 1 - assert result.created == 0 + # Predicts the upload (count matches a real run) without touching the wire. + assert result.created == 1 + assert result.skipped == 0 assert tgt.uploaded == [] assert src.download_calls == [] diff --git a/tests/clone/test_group_phase.py b/tests/clone/test_group_phase.py index 9ef1490..8ce3d0c 100644 --- a/tests/clone/test_group_phase.py +++ b/tests/clone/test_group_phase.py @@ -102,9 +102,11 @@ def test_dry_run_makes_no_posts(): result = GroupPhase(ctx).run(CloneReport()) - assert result.skipped == 1 + assert result.created == 1 + assert result.skipped == 0 assert tgt.group_posts == [] - assert ctx.remap.resolve("group", "1") is None + planned = ctx.remap.resolve("group", "1") + assert planned is not None and ctx.remap.is_planned(planned) def test_members_not_cloned_by_default(): diff --git a/tests/clone/test_pipeline_phase.py b/tests/clone/test_pipeline_phase.py index e69c3cb..f1c09fc 100644 --- a/tests/clone/test_pipeline_phase.py +++ b/tests/clone/test_pipeline_phase.py @@ -230,8 +230,11 @@ def test_dry_run_makes_no_writes(): result = PipelinePhase(ctx).run(CloneReport()) - assert result.skipped == 1 + assert result.created == 1 + assert result.skipped == 0 assert tgt.posts == [] + planned = ctx.remap.resolve("pipeline", "src-pl-1") + assert planned is not None and ctx.remap.is_planned(planned) def test_abort_on_name_conflict_raises(): diff --git a/tests/clone/test_remap_table.py b/tests/clone/test_remap_table.py index 6a13754..f74dffc 100644 --- a/tests/clone/test_remap_table.py +++ b/tests/clone/test_remap_table.py @@ -34,3 +34,35 @@ def test_snapshot_is_independent_copy(): snap = t.snapshot() t.record("adapter", "src-2", "tgt-2") assert "src-2" not in snap["adapter"] + + +def test_record_planned_is_deterministic_and_resolvable(): + t1, t2 = RemapTable(), RemapTable() + a = t1.record_planned("workflow", "src-w") + b = t2.record_planned("workflow", "src-w") + # Same (entity, src) → same synthetic id across runs (resume-safe). + assert a == b + assert t1.resolve("workflow", "src-w") == a + assert t1.is_planned(a) + # Different src or entity → different id. + assert t1.record_planned("workflow", "src-other") != a + assert t1.record_planned("pipeline", "src-w") != a + + +def test_is_planned_false_for_real_ids(): + t = RemapTable() + t.record("adapter", "src-1", "tgt-1") + assert not t.is_planned("tgt-1") + + +def test_snapshot_hide_planned_masks_only_synthetic_ids(): + t = RemapTable() + t.record("adapter", "src-real", "tgt-real") + t.record_planned("workflow", "src-planned") + + visible = t.snapshot() + assert visible["workflow"]["src-planned"] != "(planned)" + + masked = t.snapshot(hide_planned=True) + assert masked["adapter"]["src-real"] == "tgt-real" + assert masked["workflow"]["src-planned"] == "(planned)" diff --git a/tests/clone/test_tag_phase.py b/tests/clone/test_tag_phase.py index f6086a9..bcd5aad 100644 --- a/tests/clone/test_tag_phase.py +++ b/tests/clone/test_tag_phase.py @@ -94,9 +94,11 @@ def test_dry_run_makes_no_posts(): result = TagPhase(ctx).run(report) - assert result.skipped == 1 - assert result.created == 0 + assert result.created == 1 + assert result.skipped == 0 assert tgt.posts == [] + planned = ctx.remap.resolve("tag", "src-a") + assert planned is not None and ctx.remap.is_planned(planned) def test_abort_on_name_conflict_raises(): diff --git a/tests/clone/test_tool_instance_phase.py b/tests/clone/test_tool_instance_phase.py index 180c285..a161ab4 100644 --- a/tests/clone/test_tool_instance_phase.py +++ b/tests/clone/test_tool_instance_phase.py @@ -207,7 +207,7 @@ def test_broken_adapter_refs_bumps_skipped_and_records_error(): assert any("stale adapter refs" in e for e in result.errors) -def test_dry_run_does_not_create_or_patch(): +def test_dry_run_predicts_create_without_writing(): src = FakeClient() src.instances[SRC_WF] = [_src_ti("src-ti-1", SRC_WF, SRC_REG, {"x": 1})] tgt = FakeClient() @@ -215,7 +215,30 @@ def test_dry_run_does_not_create_or_patch(): result = ToolInstancePhase(ctx).run(CloneReport()) - assert result.skipped == 1 + assert result.created == 1 + assert result.skipped == 0 + assert tgt.create_calls == [] + assert tgt.patch_calls == [] + planned = ctx.remap.resolve("tool_instance", "src-ti-1") + assert planned is not None and ctx.remap.is_planned(planned) + + +def test_dry_run_planned_workflow_predicts_create(): + # Parent workflow would be freshly created (planned id). The tool_instance + # can't exist on target yet, so the phase must predict a create off the + # planned workflow rather than no-op. + src = FakeClient() + src.instances[SRC_WF] = [_src_ti("src-ti-1", SRC_WF, SRC_REG, {"x": 1})] + tgt = FakeClient() + remap = RemapTable() + remap.record_planned("workflow", SRC_WF) + remap.record_planned("prompt_studio_registry", SRC_REG) + ctx = _ctx(src, tgt, remap=remap, dry_run=True) + + result = ToolInstancePhase(ctx).run(CloneReport()) + + assert result.created == 1 + assert result.skipped == 0 assert tgt.create_calls == [] assert tgt.patch_calls == [] @@ -240,9 +263,10 @@ def test_dry_run_on_adopt_path_does_not_repatch_target(): result = ToolInstancePhase(ctx).run(CloneReport()) - assert result.skipped == 1 - assert result.adopted == 0 + # Adopt path: counts as adopted (matching a real run), metadata PATCH + # skipped, real target remap recorded. + assert result.adopted == 1 + assert result.skipped == 0 assert tgt.create_calls == [] assert tgt.patch_calls == [] - # Remap still gets recorded so downstream dry-run output is coherent. assert ctx.remap.resolve("tool_instance", "src-ti-1") == "tgt-pre-ti" diff --git a/tests/clone/test_workflow_endpoint_phase.py b/tests/clone/test_workflow_endpoint_phase.py index 811488f..6ec8072 100644 --- a/tests/clone/test_workflow_endpoint_phase.py +++ b/tests/clone/test_workflow_endpoint_phase.py @@ -227,8 +227,36 @@ def test_dry_run_makes_no_patches(): result = WorkflowEndpointPhase(ctx).run(CloneReport()) - assert result.skipped == 1 + # Endpoints count as `created` on a real PATCH; dry-run predicts that + # without writing, and records a planned remap. + assert result.created == 1 + assert result.skipped == 0 + assert tgt.patch_calls == [] + planned = ctx.remap.resolve("workflow_endpoint", "src-ep-source") + assert planned is not None and ctx.remap.is_planned(planned) + + +def test_dry_run_planned_workflow_predicts_patch_without_target_lookup(): + # Parent workflow would be freshly created (planned id), so its endpoints + # don't exist on target yet. The phase must predict a patch per source + # endpoint, not fail with "missing tgt endpoint". + src = FakeClient() + src.endpoints[SRC_WF] = [ + _src_endpoint("src-ep-source", "SOURCE", SRC_CONN, {}), + _src_endpoint("src-ep-dest", "DESTINATION", SRC_CONN, {}), + ] + tgt = FakeClient() # no endpoints for the planned workflow + remap = RemapTable() + planned_wf = remap.record_planned("workflow", SRC_WF) + remap.record("connector", SRC_CONN, TGT_CONN) + ctx = _ctx(src, tgt, remap=remap, dry_run=True) + + result = WorkflowEndpointPhase(ctx).run(CloneReport()) + + assert result.created == 2 + assert result.failed == 0 assert tgt.patch_calls == [] + assert remap.is_planned(planned_wf) def test_no_workflows_in_remap_is_noop(): diff --git a/tests/clone/test_workflow_phase.py b/tests/clone/test_workflow_phase.py index b29aca8..e12bb3a 100644 --- a/tests/clone/test_workflow_phase.py +++ b/tests/clone/test_workflow_phase.py @@ -145,15 +145,20 @@ def test_idempotent_rerun_adopts_existing_workflow(): assert ctx.remap.resolve("workflow", "wf-src-1") == "wf-tgt-pre" -def test_dry_run_creates_nothing(): +def test_dry_run_predicts_create_without_writing(): src = FakeClient([_src("wf-src-1", "Invoice ETL")]) tgt = FakeClient() ctx = _ctx(src, tgt, dry_run=True) result = WorkflowPhase(ctx).run(CloneReport()) - assert result.skipped == 1 + # Count matches a real run; nothing posted; planned remap recorded so + # downstream phases (tool_instance, endpoint, pipeline) can plan. + assert result.created == 1 + assert result.skipped == 0 assert tgt.posts == [] + planned = ctx.remap.resolve("workflow", "wf-src-1") + assert planned is not None and ctx.remap.is_planned(planned) def test_abort_on_name_conflict_raises():