From d1a0384e8c5667b4f375caba5ff283212e23f5de Mon Sep 17 00:00:00 2001 From: Viswanath Lekshmanan Date: Sun, 28 Jun 2026 08:05:18 +0530 Subject: [PATCH 1/2] feat(governance): in-runtime policy evaluator + guardrail compensation Rebase of the guardrail-compensation series onto feat/governance-audit's tip. Brings up the native governance layer in one squash: - In-runtime policy evaluator (native/evaluator.py): rule + check + condition matching with VADER sentiment / chardet / regex / entropy / incident / commitment operators. Honors per-check action overrides and cross-rule aggregation. Instance-scoped with explicit deps (AuditManager + GuardrailCompensator) injected by the host. - Native package exports (native/__init__.py): build_policy_index_from_yaml + GovernanceEvaluator + GuardrailCompensator + CheckContext + PolicyIndex. - GuardrailCompensator (native/guardrail_compensation.py): bounded ThreadPoolExecutor + BoundedSemaphore per runtime, contextvars propagation, weakref-tracked process-level atexit. Delegates HTTP / auth / URL / trace correlation to the injected GovernanceCompensationProvider. - Drop PolicyLoader: host fetches policy asynchronously via GovernancePolicyProvider and hands the resolved PolicyIndex to UiPathGovernedRuntime at construction. - Trace correlation: AuditEvent / AuditRecord no longer carry trace_id; OTel-backed sinks resolve from the live span via the AuditManager's captured contextvars snapshot. - testpypi dev pin (local dev only): uipath-core + uipath-platform pinned to the testpypi dev builds from PR UiPath/uipath-python#1761 (AdapterRegistry deletion + AuditRecord.trace_id field drop) via ``[tool.uv] override-dependencies`` + ``[tool.uv.sources]``. The wheel-baked ``[project.dependencies]`` constraint stays at the canonical ``uipath-core>=0.5.22,<0.6.0`` so consumer workspaces that don't configure testpypi (notably uipath-python's CI matrix) resolve cleanly against published versions. Tests: 346 passed + 1 skipped, ruff + mypy clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- pyproject.toml | 3 + .../runtime/governance/_audit/traces.py | 2 +- .../runtime/governance/native/__init__.py | 45 + .../governance/native/_yaml_to_index.py | 11 +- .../runtime/governance/native/evaluator.py | 1102 +++++++++++++++++ .../native/guardrail_compensation.py | 311 +++++ .../runtime/governance/native/loader.py | 342 ----- src/uipath/runtime/governance/runtime.py | 224 ++-- tests/_helpers.py | 46 - tests/conftest.py | 8 +- tests/test_commitment_concern.py | 205 +++ tests/test_enforcement_mode_default.py | 114 -- tests/test_evaluator.py | 420 +++++++ tests/test_evaluator_operators.py | 672 ++++++++++ tests/test_governance_runtime.py | 193 +-- tests/test_guardrail_compensation.py | 503 ++++++++ tests/test_loader.py | 307 ----- tests/test_text_extraction.py | 307 +++++ uv.lock | 10 +- 19 files changed, 3793 insertions(+), 1032 deletions(-) create mode 100644 src/uipath/runtime/governance/native/__init__.py create mode 100644 src/uipath/runtime/governance/native/evaluator.py create mode 100644 src/uipath/runtime/governance/native/guardrail_compensation.py delete mode 100644 src/uipath/runtime/governance/native/loader.py delete mode 100644 tests/_helpers.py create mode 100644 tests/test_commitment_concern.py delete mode 100644 tests/test_enforcement_mode_default.py create mode 100644 tests/test_evaluator.py create mode 100644 tests/test_evaluator_operators.py create mode 100644 tests/test_guardrail_compensation.py delete mode 100644 tests/test_loader.py create mode 100644 tests/test_text_extraction.py diff --git a/pyproject.toml b/pyproject.toml index 48f7483..8d8792f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -126,6 +126,9 @@ exclude-newer = "2 days" [tool.uv.exclude-newer-package] uipath-core = false +[tool.uv.sources] +uipath-core = { index = "testpypi" } + [[tool.uv.index]] name = "testpypi" url = "https://test.pypi.org/simple/" diff --git a/src/uipath/runtime/governance/_audit/traces.py b/src/uipath/runtime/governance/_audit/traces.py index abf1310..62db7d6 100644 --- a/src/uipath/runtime/governance/_audit/traces.py +++ b/src/uipath/runtime/governance/_audit/traces.py @@ -3,7 +3,7 @@ This sink creates OpenTelemetry spans for governance events. UiPath's OTel exporter (``uipath.tracing._otel_exporters.LlmOpsHttpExporter`` via ``_SpanUtils.otel_span_to_uipath_span``) is what ships them to the -Orchestrator Traces UI and is also what reads ``UIPATH_TRACE_ID``, +Orchestrator Traces UI and is also what reads ``UIPATH_ORGANIZATION_ID``, ``UIPATH_TENANT_ID``, ``UIPATH_FOLDER_KEY`` and ``UIPATH_JOB_KEY`` from the process environment and stamps them onto the outgoing ``UiPathSpan``. We intentionally do **not** duplicate that diff --git a/src/uipath/runtime/governance/native/__init__.py b/src/uipath/runtime/governance/native/__init__.py new file mode 100644 index 0000000..713a05d --- /dev/null +++ b/src/uipath/runtime/governance/native/__init__.py @@ -0,0 +1,45 @@ +"""Native UiPath governance policy evaluator. + +YAML-defined rules evaluated in-process at each agent lifecycle hook. +The host fetches the policy pack via the +:class:`GovernancePolicyProvider` protocol and compiles it into a +:class:`PolicyIndex` with :func:`build_policy_index_from_yaml` *before* +constructing :class:`GovernanceRuntime` — so the runtime layer never +performs I/O at construction time. + +This subpackage owns: + +- :class:`GovernanceEvaluator` – the evaluator implementation. +- :func:`build_policy_index_from_yaml` – pure YAML → :class:`PolicyIndex` + compiler. +- The native policy model: :class:`Rule`, :class:`Check`, + :class:`Condition`, :class:`PolicyIndex`. + +Shared output types (``Action``, ``AuditRecord``, …) live in +:mod:`uipath.core.governance`. +""" + +from ._yaml_to_index import build_policy_index_from_yaml +from .evaluator import GovernanceEvaluator +from .models import ( + Check, + CheckContext, + Condition, + PolicyIndex, + PolicyPack, + Rule, + Severity, +) + +__all__ = [ + "GovernanceEvaluator", + "build_policy_index_from_yaml", + # Native policy model + "Check", + "CheckContext", + "Condition", + "PolicyIndex", + "PolicyPack", + "Rule", + "Severity", +] diff --git a/src/uipath/runtime/governance/native/_yaml_to_index.py b/src/uipath/runtime/governance/native/_yaml_to_index.py index 3bf264c..9abdec3 100644 --- a/src/uipath/runtime/governance/native/_yaml_to_index.py +++ b/src/uipath/runtime/governance/native/_yaml_to_index.py @@ -1,10 +1,11 @@ """Runtime YAML → PolicyIndex parser. -Mirrors the shape produced by ``packs/compile_packs.py`` but builds the -PolicyIndex directly from parsed YAML data rather than generating Python -source. Used by :mod:`uipath.runtime.governance.native.loader` to -compile the YAML body returned by the registered policy provider into -an in-memory index at startup. +Mirrors the shape produced by ``packs/compile_packs.py`` but builds +the :class:`PolicyIndex` directly from parsed YAML data rather than +generating Python source. The host calls this to compile the YAML +body returned by :meth:`GovernancePolicyProvider.get_policy_async` +into an in-memory index, then hands the index to +:class:`GovernanceRuntime`. Accepts either a single YAML document (one pack) or a multi-document stream (``---``-separated packs). Unknown check types and malformed diff --git a/src/uipath/runtime/governance/native/evaluator.py b/src/uipath/runtime/governance/native/evaluator.py new file mode 100644 index 0000000..f629902 --- /dev/null +++ b/src/uipath/runtime/governance/native/evaluator.py @@ -0,0 +1,1102 @@ +"""Governance rule evaluator. + +Instance-scoped — every :class:`GovernanceRuntime` constructs its own +evaluator with explicit dependencies (audit manager, compensator, +enforcement mode). The evaluator does not reach across the runtime +layer through process-globals; the wiring layer composes the runtime +graph and the evaluator consumes what it's given. +""" + +from __future__ import annotations + +import logging +import math +import re +from collections import Counter +from datetime import datetime, timezone +from functools import lru_cache +from typing import Any + +from uipath.core.governance import EnforcementMode +from uipath.core.governance.exceptions import GovernanceBlockException +from uipath.core.governance.models import ( + Action, + AuditRecord, + LifecycleHook, + RuleEvaluation, +) + +from uipath.runtime.governance._audit.base import AuditManager +from uipath.runtime.governance.native.guardrail_compensation import ( + GuardrailCompensator, + disabled_guardrails, +) +from uipath.runtime.governance.native.models import ( + Check, + CheckContext, + Condition, + PolicyIndex, + Rule, +) + +logger = logging.getLogger(__name__) + + +def _compensation_data_for_hook(context: CheckContext) -> dict[str, Any]: + """Build the ``data`` payload for the /runtime/govern compensating call. + + The server runs the guardrail check against the same content the + evaluator was looking at — so we forward whichever + :class:`CheckContext` field is populated for the active hook. Fields + not relevant to the hook are omitted to keep the payload tight. + """ + if context.hook in (LifecycleHook.BEFORE_AGENT,): + return {"content": context.agent_input} + if context.hook in (LifecycleHook.AFTER_AGENT,): + return {"content": context.agent_output} + if context.hook in (LifecycleHook.BEFORE_MODEL,): + payload: dict[str, Any] = {"content": context.model_input} + if context.messages: + payload["messages"] = context.messages + return payload + if context.hook in (LifecycleHook.AFTER_MODEL,): + return {"content": context.model_output} + if context.hook in (LifecycleHook.TOOL_CALL,): + return {"tool_name": context.tool_name, "tool_args": context.tool_args} + if context.hook in (LifecycleHook.AFTER_TOOL,): + return {"tool_name": context.tool_name, "tool_result": context.tool_result} + # Memory-write and unknown hooks: pass an empty content so the + # server still receives a structurally-valid payload. + return {"content": ""} + + +@lru_cache(maxsize=256) +def _compile_regex(pattern: str) -> re.Pattern[str] | None: + """Compile and cache a regex pattern. + + Args: + pattern: The regex pattern string + + Returns: + Compiled pattern or None if invalid + """ + try: + return re.compile(pattern) + except re.error as e: + logger.warning("Invalid regex pattern '%s': %s", pattern, e) + return None + + +# --- vaderSentiment: lazy-imported singleton --- +# Hard dependency, but lazy-loaded to keep import-time cost off the +# critical path. The except branch is defence against a corrupted +# install (file present in METADATA but module unimportable) — the +# operator no-ops rather than crashing the agent. +_VADER_UNINITIALIZED = object() +_vader_analyzer: Any = _VADER_UNINITIALIZED + + +def _get_vader_analyzer() -> Any: + """Return a cached SentimentIntensityAnalyzer, or None if unavailable.""" + global _vader_analyzer + if _vader_analyzer is _VADER_UNINITIALIZED: + try: + from vaderSentiment.vaderSentiment import ( # type: ignore[import-untyped] + SentimentIntensityAnalyzer, + ) + + _vader_analyzer = SentimentIntensityAnalyzer() + except ImportError: + logger.error( + "vaderSentiment failed to import despite being a hard dependency; " + "sentiment_concern checks will not fire. Reinstall uipath-core." + ) + _vader_analyzer = None + return _vader_analyzer + + +# --- chardet: lazy-imported module for encoding integrity (A.7.4) --- +# Hard dependency, lazy-loaded for symmetry with the other library +# wrappers. The except branch covers corrupted installs only. +_CHARDET_UNINITIALIZED = object() +_chardet_module: Any = _CHARDET_UNINITIALIZED + + +def _get_chardet() -> Any: + """Return the chardet module, or None if unavailable.""" + global _chardet_module + if _chardet_module is _CHARDET_UNINITIALIZED: + try: + import chardet + + _chardet_module = chardet + except ImportError: + logger.error( + "chardet failed to import despite being a hard dependency; " + "encoding_concern confidence check will not fire (stdlib " + "signals still apply). Reinstall uipath-core." + ) + _chardet_module = None + return _chardet_module + + +# --- Static patterns for encoding_concern (A.7.4) --- +# Latin-1-as-UTF-8 mojibake bigrams — the visible artefacts when +# UTF-8-encoded text is re-decoded as Latin-1 / Windows-1252. +_MOJIBAKE_BIGRAMS: tuple[str, ...] = ( + "é", + "è", + "â", + "à ", + "ù", + "î", + "ô", + "ç", # accented vowels + "Ä", + "Ö", + "Ü", + "ß", # German umlauts / eszett + "’", + "“", + "â€\x9d", + "–", + "—", + "•", # smart quotes / dashes + "£", + "°", + "§", + "¶", + "©", + "®", # NBSP-leading symbols + "ï¿", + "¿½", # mojibake'd U+FFFD (0xEF 0xBF 0xBD as Latin-1) + "ï»", + "»¿", # mojibake'd BOM (0xEF 0xBB 0xBF as Latin-1) +) + +# Literal hex escape sequences ("\x80" as 4 source chars) indicate raw +# bytes leaked through a string layer rather than being decoded. +_HEX_ESCAPE_PATTERN = re.compile(r"\\x[0-9a-fA-F]{2}") + + +# --- Static patterns for incident_concern (A.8.4) --- +# Stdlib-only categorical taxonomy. Mirrors sentry-sdk's incident shape +# (categorical types over stack/status), but for string payloads from +# model output / tool result rather than exception objects. +_INCIDENT_PATTERNS: dict[str, list[re.Pattern[str]]] = { + "safety_refusal": [ + re.compile( + r"(?i)\b(i\s+(?:cannot|can'?t|am\s+unable\s+to|won'?t\s+be\s+able\s+to)" + r"\s+(?:help|assist|provide|answer|do\s+that))\b" + ), + re.compile(r"(?i)\b(i'?m\s+sorry,?\s+but\s+i\s+(?:cannot|can'?t))\b"), + re.compile(r"(?i)\b(against\s+my\s+(?:guidelines|policies|programming))\b"), + ], + "tool_failure": [ + re.compile( + r"\b(5\d{2})\b\s*(?:internal\s+server\s+error|service\s+unavailable)" + ), + re.compile(r"(?i)\b(ERR_[A-Z_]+|connection\s+refused|ECONNREFUSED)\b"), + re.compile(r"(?i)\b(timed?\s*out|timeout)\b"), + ], + "auth_failure": [ + re.compile(r"\b(401|403)\b\s*(?:unauthori[sz]ed|forbidden)"), + re.compile( + r"(?i)\b(authentication\s+failed|invalid\s+(?:token|credentials))\b" + ), + ], + "quota_exceeded": [ + re.compile(r"\b(429)\b"), + re.compile( + r"(?i)\b(rate\s+limit\s+exceeded|quota\s+exceeded|too\s+many\s+requests)\b" + ), + ], + "hallucination": [ + re.compile(r"(?i)\b(i\s+(?:made\s+(?:that|this)\s+up|am\s+just\s+guessing))\b"), + re.compile(r"(?i)\b(i\s+don'?t\s+actually\s+know|i\s+fabricat(?:ed|ing))\b"), + ], +} + +# --- Static patterns for commitment_concern (A.10.4) --- +# Commitment-language signals. The verb pattern covers both first-person +# promise verbs ("we will refund") and formal-business commitment markers +# common in proposal / SOW outputs ("Cost: $X", "fixed scope", +# "Deliverables", "Timeline: N days", "I propose"). Verb, amount, and +# deadline signals combine via OR semantics — see +# :meth:`_check_commitment_concern`. +_COMMITMENT_VERB_PATTERN = re.compile( + r"(?i)(" + # First-person promise / liability verbs + r"\brefund\b|\breimburse\b|" + r"\bwarranty\b|\bwarrant(?:y|ed|ies)\b|\bguarante[ed]+\b|" + r"\bsla\b|" + r"\bwaive[d]?\b|" + r"\b(?:we|i)\s+(?:will|shall|promise|commit|guarantee)\b|" + r"\b(?:we|i|i'?ll)\s+(?:deliver|provide|complete|finish|" + r"handover|hand\s+over|ship)\b|" + # Proposal / SOW commitment markers + r"\bfixed\s+(?:price|cost|fee|scope|bid|rate)\b|" + r"\bcost\s*:\s*\$?\d|" + r"\bquote\s*:\s*\$?\d|" + r"\bdeliverables?\b|" + r"\btimeline\s*:\s*\d+\s*(?:second|minute|hour|day|week|month|year)s?\b|" + r"\bI\s+propose\b" + r")" +) +# Currency-anchored amount detection. Requires a currency marker adjacent +# to the number so URL fragments (e.g. ``/667851``) don't false-positive. +# Covers symbol-then-number ($780) and number-then-code (780 USD). +# +# Bare percentages (``75%``, ``99.9%``) are deliberately NOT matched +# here — they fire on benign status / progress text ("75% complete", +# "99.9% uptime") under OR semantics. Real percentage-bearing +# commitments ("we'll give you a 20% discount", "refund 100%") still +# fire via the verb pattern. +_COMMITMENT_AMOUNT_FALLBACK = re.compile( + r"(?:\$|€|£|¥|₹|USD|EUR|GBP|JPY|INR)\s*\d[\d,]*(?:\.\d+)?" + r"|\b\d[\d,]*(?:\.\d+)?\s*(?:USD|EUR|GBP|JPY|INR|" + r"dollars?|euros?|pounds?|yen|rupees?)\b" +) +_COMMITMENT_DEADLINE_PATTERN = re.compile( + r"(?i)\bwithin\s+\d+\s*(?:second|minute|hour|day|week|month|year)s?\b" + r"|\bby\s+(?:tomorrow|next\s+\w+|\d+/\d+(?:/\d+)?)\b" +) + + +class GovernanceEvaluator: + """Evaluates governance rules against check contexts. + + Supports two enforcement modes: + + - ``AUDIT``: log all violations but never block (DENY collapses to + AUDIT in the final action). + - ``ENFORCE``: actually block on DENY rules — raises + :class:`GovernanceBlockException` and the agent stops. + + All dependencies (mode, audit manager, compensator) are injected + via the constructor. The evaluator does not consult any + process-global state — parallel runtimes (``uipath eval``) get + their own evaluator with their own audit + compensation pipelines. + """ + + def __init__( + self, + policy_index: PolicyIndex, + *, + enforcement_mode: EnforcementMode = EnforcementMode.AUDIT, + audit_manager: AuditManager | None = None, + compensator: GuardrailCompensator | None = None, + ) -> None: + """Initialize with a compiled policy index and runtime-scoped deps. + + Args: + policy_index: The compiled :class:`PolicyIndex` to evaluate. + Typically read from :attr:`GovernanceRuntime.policy_index` + — the host built it from the provider's + :class:`PolicyResponse` via + :func:`build_policy_index_from_yaml`. + enforcement_mode: Mode the evaluator applies. Defaults to + ``AUDIT`` — the safe default for callers that don't + explicitly opt in to ENFORCE. The wiring layer should + pass ``runtime.enforcement_mode`` here so the evaluator + and the wrapping :class:`GovernanceRuntime` agree on a + single source of truth. + audit_manager: Per-runtime :class:`AuditManager`. When + ``None`` the evaluator runs silently (no audit events + emitted). Tests that don't care about emission can + leave this out. + compensator: Per-runtime :class:`GuardrailCompensator` + used to dispatch ``/runtime/govern`` POSTs for + guardrail-fallback rules. When ``None`` such dispatch + is skipped — the evaluator still records the matched + rules in the :class:`AuditRecord`. + """ + self._policy_index = policy_index + self._enforcement_mode = enforcement_mode + self._audit_manager = audit_manager + self._compensator = compensator + + @property + def policy_index(self) -> PolicyIndex: + """Return the compiled policy index this evaluator runs against.""" + return self._policy_index + + @property + def mode(self) -> EnforcementMode: + """The enforcement mode this evaluator applies.""" + return self._enforcement_mode + + def is_audit_mode(self) -> bool: + """Check if running in audit-only mode.""" + return self._enforcement_mode == EnforcementMode.AUDIT + + def evaluate(self, context: CheckContext) -> AuditRecord: + """Evaluate rules registered for ``context.hook`` against the context. + + Only rules whose ``hook`` field matches the current lifecycle hook + are evaluated — a ``tool_call`` rule does not fire on + ``before_model``, and vice versa. This avoids running checks + against fields the context cannot provide and keeps the audit + stream scoped to the active phase. + + The final action depends on the enforcement mode: + - DISABLED mode: Short-circuit; no rules evaluated, no audit emitted. + - AUDIT mode: Even DENY rules result in AUDIT action (log only, don't block) + - ENFORCE mode: DENY rules result in DENY action AND a + :class:`GovernanceBlockException` is raised. + + Audit events (per-rule + hook summary) are emitted via the + :class:`AuditManager` injected at construction (skipped when + none was supplied). + + Args: + context: The check context with hook and content + + Returns: + AuditRecord with all evaluations and final action. + + Raises: + GovernanceBlockException: In ENFORCE mode when a DENY rule matches. + """ + mode = self._enforcement_mode + if mode == EnforcementMode.DISABLED: + return AuditRecord( + timestamp=datetime.now(timezone.utc), + agent_name=context.agent_name, + runtime_id=context.runtime_id, + hook=context.hook, + evaluations=[], + final_action=Action.ALLOW, + metadata={**context.metadata, "enforcement_mode": mode.value}, + ) + + rules = self._policy_index.get_rules_for_hook(context.hook) + + evaluations: list[RuleEvaluation] = [] + raw_action = Action.ALLOW # The action before mode adjustment + deny_would_fire = False # Track if DENY would have fired + + for rule in rules: + if not rule.enabled: + continue + + evaluation = self._evaluate_rule(rule, context) + evaluations.append(evaluation) + + if evaluation.matched: + # Take the most restrictive action. Use evaluation.action + # (which already folds in per-check overrides), not + # rule.action, so check-level overrides are honored here too. + eval_action = evaluation.action + if eval_action == Action.DENY: + raw_action = Action.DENY + deny_would_fire = True + elif eval_action == Action.ESCALATE and raw_action != Action.DENY: + raw_action = Action.ESCALATE + elif eval_action == Action.AUDIT and raw_action == Action.ALLOW: + raw_action = Action.AUDIT + + # Apply enforcement mode + final_action = self._apply_enforcement_mode(raw_action) + + # Build metadata with mode info + record_metadata = dict(context.metadata) + record_metadata["enforcement_mode"] = mode.value + if deny_would_fire and self.is_audit_mode(): + record_metadata["audit_mode_would_deny"] = True + + audit = AuditRecord( + timestamp=datetime.now(timezone.utc), + agent_name=context.agent_name, + runtime_id=context.runtime_id, + hook=context.hook, + evaluations=evaluations, + final_action=final_action, + metadata=record_metadata, + ) + + self._emit_audit(audit, mode) + + # For any guardrail mapped to UiPath but currently disabled, hand + # the disabled guardrails to the governance-server's + # /runtime/govern endpoint. The SERVER runs the guardrail check + # AND writes the trace (the payload carries traceId / src_timestamp + # / hook / agent so it can correlate) — the agent does NOT emit a + # trace itself, to avoid double-writing. Fire-and-forget on a + # daemon thread so a slow or unreachable endpoint never blocks + # the agent. + self._dispatch_compensation(audit, context) + + if final_action == Action.DENY: + raise GovernanceBlockException.from_audit_record(audit) + + return audit + + def _dispatch_compensation( + self, audit: AuditRecord, context: CheckContext + ) -> None: + """Schedule compensating governance for any matched fallback rules. + + Delegates to the injected :class:`GuardrailCompensator`. The + compensator owns concurrency, queue caps, exception isolation, + and graceful process-exit cancellation — this method just + builds the payload, logs the summary, and submits. + + No-op when no compensator was supplied at construction (e.g. + unit tests that don't care about the dispatch path). + """ + if self._compensator is None: + return + + try: + disabled = disabled_guardrails(audit, self._policy_index) + if not disabled: + return + + # Distinct validator names for the operator-facing log line. + validators = [rule.validator for rule in disabled] + + # Surface the disabled-guardrail fire-up: how many rules + # triggered the compensating call, and which validators + # they map to (e.g. pii_detection / prompt_injection / + # harmful_content). One line per dispatch so an operator + # can see the volume + breakdown at a glance. + logger.info( + "Compensating governance triggered: hook=%s, count=%d, validators=[%s]", + audit.hook.value, + len(disabled), + ", ".join(validators), + ) + + self._compensator.submit( + rules=disabled, + data=_compensation_data_for_hook(context), + hook=audit.hook.value, + src_timestamp=audit.timestamp.isoformat(), + agent_name=audit.agent_name, + runtime_id=audit.runtime_id, + ) + except Exception as exc: # noqa: BLE001 - fail-open + logger.warning( + "Failed to dispatch compensating governance call: %s", exc + ) + + def _emit_audit(self, audit: AuditRecord, mode: EnforcementMode) -> None: + """Emit per-rule and hook-summary events to the injected audit manager. + + No-op when no audit manager was supplied at construction. The + per-runtime :class:`AuditManager` handles sink-level circuit + breaking; emission errors stay there and never break evaluation. + """ + manager = self._audit_manager + if manager is None: + return + + hook_name = audit.hook.name + + # ``guardrail_fallback`` rules are server-traced: the agent POSTs + # to ``/runtime/govern`` (see :meth:`_dispatch_compensation`) and + # the governance-server emits the audit event with the actual + # validator verdict. Emitting a Python-side ``rule_evaluation`` + # event here would produce a duplicate trace carrying no + # verdict, so filter these rules out of every event the Python + # evaluator emits (per-rule AND the hook summary's counts). + emittable = [ + ev for ev in audit.evaluations + if not self._is_guardrail_fallback_rule(ev.rule_id) + ] + + for evaluation in emittable: + manager.emit_rule_evaluation( + policy_id=evaluation.rule_id, + rule_name=evaluation.rule_name, + pack_name=evaluation.pack_name, + hook=hook_name, + matched=evaluation.matched, + action=evaluation.action.value if evaluation.matched else "allow", + enforcement_mode=mode, + detail=evaluation.detail, + agent_name=audit.agent_name, + description=evaluation.description, + ) + + manager.emit_hook_summary( + hook=hook_name, + agent_name=audit.agent_name, + total_rules=len(emittable), + matched_rules=sum(1 for ev in emittable if ev.matched), + final_action=audit.final_action.value, + enforcement_mode=mode, + ) + + def _is_guardrail_fallback_rule(self, rule_id: str) -> bool: + """Return True if the rule is a UiPath-compensating fallback rule. + + Such rules carry a ``guardrail_fallback`` condition; their audit + trace is emitted by the governance-server in response to the + ``/runtime/govern`` POST, so the Python evaluator must not emit + a duplicate trace for them. + """ + rule = self._policy_index.get_rule(rule_id) + if rule is None: + return False + for check in rule.checks: + for cond in check.conditions: + if cond.operator == "guardrail_fallback": + return True + return False + + def _apply_enforcement_mode(self, raw_action: Action) -> Action: + """Apply enforcement mode to the raw action. + + In AUDIT mode: + - DENY becomes AUDIT (log but don't block) + - ESCALATE becomes AUDIT (log but don't escalate) + - AUDIT stays AUDIT + - ALLOW stays ALLOW + + In ENFORCE mode: + - All actions pass through unchanged + """ + if self._enforcement_mode == EnforcementMode.AUDIT: + if raw_action in (Action.DENY, Action.ESCALATE): + return Action.AUDIT + return raw_action + + def evaluate_before_agent( + self, + agent_input: str, + agent_name: str, + runtime_id: str, + model_name: str = "", + **kwargs: Any, + ) -> AuditRecord: + """Evaluate BEFORE_AGENT rules.""" + context = CheckContext( + hook=LifecycleHook.BEFORE_AGENT, + agent_name=agent_name, + runtime_id=runtime_id, + agent_input=agent_input, + model_name=model_name, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_after_agent( + self, + agent_output: str, + agent_name: str, + runtime_id: str, + **kwargs: Any, + ) -> AuditRecord: + """Evaluate AFTER_AGENT rules.""" + context = CheckContext( + hook=LifecycleHook.AFTER_AGENT, + agent_name=agent_name, + runtime_id=runtime_id, + agent_output=agent_output, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_before_model( + self, + model_input: str, + agent_name: str, + runtime_id: str, + messages: list[dict[str, Any]] | None = None, + model_name: str = "", + **kwargs: Any, + ) -> AuditRecord: + """Evaluate BEFORE_MODEL rules.""" + context = CheckContext( + hook=LifecycleHook.BEFORE_MODEL, + agent_name=agent_name, + runtime_id=runtime_id, + model_input=model_input, + model_name=model_name, + messages=messages or [], + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_after_model( + self, + model_output: str, + agent_name: str, + runtime_id: str, + **kwargs: Any, + ) -> AuditRecord: + """Evaluate AFTER_MODEL rules.""" + context = CheckContext( + hook=LifecycleHook.AFTER_MODEL, + agent_name=agent_name, + runtime_id=runtime_id, + model_output=model_output, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_tool_call( + self, + tool_name: str, + tool_args: dict[str, Any], + agent_name: str, + runtime_id: str, + session_state: dict[str, Any] | None = None, + **kwargs: Any, + ) -> AuditRecord: + """Evaluate TOOL_CALL rules.""" + context = CheckContext( + hook=LifecycleHook.TOOL_CALL, + agent_name=agent_name, + runtime_id=runtime_id, + tool_name=tool_name, + tool_args=tool_args, + session_state=session_state or {}, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def evaluate_after_tool( + self, + tool_name: str, + tool_result: str, + agent_name: str, + runtime_id: str, + **kwargs: Any, + ) -> AuditRecord: + """Evaluate AFTER_TOOL rules.""" + context = CheckContext( + hook=LifecycleHook.AFTER_TOOL, + agent_name=agent_name, + runtime_id=runtime_id, + tool_name=tool_name, + tool_result=tool_result, + metadata=kwargs.get("metadata", {}), + ) + return self.evaluate(context) + + def _evaluate_rule(self, rule: Rule, context: CheckContext) -> RuleEvaluation: + """Evaluate a single rule against the context.""" + if not rule.checks: + # No checks = always matches (for audit-only rules) + return RuleEvaluation( + rule_id=rule.rule_id, + rule_name=rule.name, + matched=True, + detail="Rule has no conditions (always matches)", + pack_name=rule.pack_name, + action=rule.action, + description=rule.description, + ) + + check_results: list[dict[str, Any]] = [] + any_check_matched = False + # Resolve the rule's action from the MATCHED checks so per-check + # `action` overrides take effect. ``Check.action`` defaults to the + # rule's action (see _yaml_to_index), so for rules without an + # override this equals ``rule.action`` exactly. Take the most + # restrictive matched action (DENY > ESCALATE > AUDIT > ALLOW), + # mirroring evaluate()'s cross-rule aggregation. + matched_action = Action.ALLOW + + for check in rule.checks: + matched, detail = self._evaluate_check(check, context) + check_results.append( + { + "matched": matched, + "detail": detail, + "action": check.action.value, + } + ) + if matched: + any_check_matched = True + if check.action == Action.DENY: + matched_action = Action.DENY + elif ( + check.action == Action.ESCALATE + and matched_action != Action.DENY + ): + matched_action = Action.ESCALATE + elif ( + check.action == Action.AUDIT + and matched_action == Action.ALLOW + ): + matched_action = Action.AUDIT + + # Surface the FIRST matched check's message; falls back to the + # first check's detail (empty string when none matched) for + # backward compatibility with rules that have a single check. + first_matched_detail = next( + (cr["detail"] for cr in check_results if cr["matched"]), + check_results[0]["detail"] if check_results else "", + ) + + return RuleEvaluation( + rule_id=rule.rule_id, + rule_name=rule.name, + matched=any_check_matched, + detail=first_matched_detail, + pack_name=rule.pack_name, + action=matched_action if any_check_matched else Action.ALLOW, + description=rule.description, + check_results=check_results, + ) + + def _evaluate_check(self, check: Check, context: CheckContext) -> tuple[bool, str]: + """Evaluate a single check against the context.""" + if not check.conditions: + return True, "No conditions (always matches)" + + results = [] + for condition in check.conditions: + matched = self._evaluate_condition(condition, context) + results.append(matched) + + if check.logic == "any": + final_match = any(results) + else: # "all" is default + final_match = all(results) + + detail = check.message if final_match else "" + return final_match, detail + + def _evaluate_condition(self, condition: Condition, context: CheckContext) -> bool: + """Evaluate a single condition against the context.""" + field_value = self._get_field_value(condition.field, context) + result = self._apply_operator(condition.operator, field_value, condition.value) + + if condition.negate: + result = not result + + return result + + def _get_field_value(self, field: str, context: CheckContext) -> Any: + """Get a field value from the context.""" + parts = field.split(".") + + # Start with context + value: Any = context + + for part in parts: + if hasattr(value, part): + value = getattr(value, part) + elif isinstance(value, dict) and part in value: + value = value[part] + else: + return None + + return value + + def _apply_operator( + self, operator: str, field_value: Any, check_value: Any + ) -> bool: + """Apply an operator to compare field value against check value.""" + # Handle existence checks before the None check + if operator == "exists": + return field_value is not None + if operator == "not_exists": + return field_value is None + + # guardrail_fallback fires only when the guardrail is mapped to + # UiPath but its policy is disabled. Config travels in + # ``check_value``; the rule's ``field`` is unused (so + # ``field_value`` is ``None`` here, which is expected — we must + # special-case this before the generic ``None`` short-circuit + # below). + if operator == "guardrail_fallback": + cfg = check_value if isinstance(check_value, dict) else {} + return bool(cfg.get("mapped_to_uipath", False)) and not bool( + cfg.get("policy_enabled", True) + ) + + if field_value is None: + return False + + # Numeric operators don't need stringification — short-circuit + # before `str(field_value)` (expensive for dict / large payloads). + if operator in ("gt", "gte", "lt", "lte"): + try: + lhs = float(field_value) + rhs = float(check_value) + except (ValueError, TypeError): + return False + if operator == "gt": + return lhs > rhs + if operator == "gte": + return lhs >= rhs + if operator == "lt": + return lhs < rhs + return lhs <= rhs + + field_str = str(field_value) + + match operator: + case "equals" | "eq": + return field_str == str(check_value) + + case "not_equals" | "ne": + return field_str != str(check_value) + + case "contains": + return str(check_value).lower() in field_str.lower() + + case "not_contains": + return str(check_value).lower() not in field_str.lower() + + case "regex" | "matches": + compiled = _compile_regex(str(check_value)) + if compiled is None: + return False + return bool(compiled.search(field_str)) + + case "in_list": + if isinstance(check_value, list): + return field_str in check_value + return False + + case "not_in_list": + if isinstance(check_value, list): + return field_str not in check_value + return True + + case "vader_concern": + # VADER compound score <= threshold. + # check_value: dict like {"threshold": -0.3} (default -0.3) + return self._check_vader_concern(field_str, check_value) + + case "encoding_concern": + # chardet-backed encoding integrity check (A.7.4). + # check_value: dict with optional `min_confidence` (default 0.5) + # and `max_replacement_ratio` (default 0.05). + return self._check_encoding_concern(field_str, check_value) + + case "entropy_concern": + # Shannon entropy outside expected range (A.7.4). + # check_value: dict with optional `min` (default 1.5) and + # `max` (default 7.5) bits/byte. Stdlib only. + return self._check_entropy_concern(field_str, check_value) + + case "incident_concern": + # Categorical incident detection (A.8.4). + # check_value: dict with optional `categories` list + # (subset of safety_refusal/tool_failure/auth_failure/ + # quota_exceeded/hallucination). Default: all categories. + return self._check_incident_concern(field_str, check_value) + + case "commitment_concern": + # Customer commitment language detection (A.10.4). + # check_value: dict with optional `require_amount` (default + # True) and `require_deadline` (default False). Fires when + # a commitment verb co-occurs with the configured signals. + return self._check_commitment_concern(field_str, check_value) + + case _: + logger.debug("Unknown operator: %s", operator) + return False + + @staticmethod + def _check_vader_concern(text: str, params: Any) -> bool: + """Return True if VADER compound score on `text` is <= threshold. + + Args: + text: Text to analyse. + params: Either a dict with `threshold` key, or a numeric threshold + directly. Default threshold is -0.3 (clearly-negative). + + Returns: + True iff vaderSentiment is available AND compound score <= threshold. + Returns False on empty input or if the library is not installed — + sentiment checks no-op rather than crash. + """ + if not text or not text.strip(): + return False + + analyzer = _get_vader_analyzer() + if analyzer is None: + return False + + if isinstance(params, dict): + threshold = float(params.get("threshold", -0.3)) + else: + try: + threshold = float(params) + except (TypeError, ValueError): + threshold = -0.3 + + try: + compound = float(analyzer.polarity_scores(text)["compound"]) + except Exception as exc: # pragma: no cover - defensive + logger.debug("VADER analysis failed: %s", exc) + return False + + return compound <= threshold + + @staticmethod + def _check_encoding_concern(text: str, params: Any) -> bool: + r"""Return True if `text` shows encoding integrity issues. + + Sums multiple deterministic corruption signals against text length: + - U+FFFD replacement characters (already-decoded lossy text) + - Literal ``�`` escape sequences carried through a JSON + / repr layer rather than being decoded + - Literal ``\xHH`` hex escapes (raw bytes leaked into a string) + - Latin-1-as-UTF-8 mojibake bigrams (e.g. ``é``, ``’``) + If the corruption ratio exceeds ``max_replacement_ratio`` the + check fires. chardet (when installed) is consulted as a + secondary low-confidence signal. + """ + if not text or not text.strip(): + return False + + if not isinstance(params, dict): + params = {} + min_confidence = float(params.get("min_confidence", 0.5)) + max_replacement_ratio = float(params.get("max_replacement_ratio", 0.05)) + min_corruption_events = int(params.get("min_corruption_events", 2)) + + length = max(len(text), 1) + + replacement_chars = text.count("�") + literal_ufffd_escapes = text.count("\\ufffd") + hex_escapes = len(_HEX_ESCAPE_PATTERN.findall(text)) + mojibake_bigrams = sum(text.count(bigram) for bigram in _MOJIBAKE_BIGRAMS) + + # Absolute count of distinct corruption *events* (one per + # U+FFFD, one per literal escape sequence, one per mojibake + # bigram). Even diluted by a lot of clean text, a few of these + # in production output is a strong signal. + corruption_events = ( + replacement_chars + literal_ufffd_escapes + hex_escapes + mojibake_bigrams + ) + if corruption_events >= min_corruption_events: + return True + + # Ratio-based fallback for cases below the absolute floor: still + # catches very short payloads where a single corruption char is + # disproportionate. + # Weight each event by its source-char span so denser corruption + # in shorter text trips the ratio sooner: + # U+FFFD = 1 char, "�" = 6 chars, "\xHH" = 4 chars, + # mojibake bigram = 2 chars. + corruption_chars = ( + replacement_chars + + 6 * literal_ufffd_escapes + + 4 * hex_escapes + + 2 * mojibake_bigrams + ) + if corruption_chars / length > max_replacement_ratio: + return True + + # Secondary: chardet on the encoded bytes. For pure str input + # this almost always reports high UTF-8/ASCII confidence (the + # branch is intentionally permissive), but it does catch bytes + # routed through `repr()` or `__str__` of a `bytes` object that + # chardet recognises as a non-UTF8 encoding with low confidence. + chardet = _get_chardet() + if chardet is None: + return False + try: + detection = chardet.detect(text.encode("utf-8", errors="replace")) + confidence = float(detection.get("confidence") or 0.0) + except Exception as exc: # pragma: no cover - defensive + logger.debug("chardet detection failed: %s", exc) + return False + + return confidence < min_confidence + + @staticmethod + def _check_entropy_concern(text: str, params: Any) -> bool: + """Return True if Shannon entropy of `text` is outside an expected range. + + Stdlib-only. Entropy is computed in bits per symbol over byte + frequencies. English prose typically lands ~3.5–4.5 bits/byte; + binary noise approaches 8 bits/byte; constant/repetitive text + approaches 0. + """ + if not text or not text.strip(): + return False + + if not isinstance(params, dict): + params = {} + lo = float(params.get("min", 1.5)) + hi = float(params.get("max", 7.5)) + + data = text.encode("utf-8", errors="replace") + total = len(data) + if total == 0: + return False + + counts = Counter(data) + entropy = 0.0 + for c in counts.values(): + p = c / total + entropy -= p * math.log2(p) + + return entropy < lo or entropy > hi + + @staticmethod + def _check_incident_concern(text: str, params: Any) -> bool: + """Return True if `text` matches any configured incident pattern (A.8.4). + + Categories: safety_refusal, tool_failure, auth_failure, + quota_exceeded, hallucination. Pass ``{"categories": [...]}`` to + restrict; default scans all categories. + """ + if not text or not text.strip(): + return False + + if isinstance(params, dict): + requested = params.get("categories") + else: + requested = None + + if not requested: + categories = list(_INCIDENT_PATTERNS.keys()) + else: + categories = [c for c in requested if c in _INCIDENT_PATTERNS] + + for category in categories: + for pattern in _INCIDENT_PATTERNS[category]: + if pattern.search(text): + return True + return False + + @staticmethod + def _check_commitment_concern(text: str, params: Any) -> bool: + """Return True if `text` carries customer-commitment language (A.10.4). + + OR semantics: a commitment-verb match always fires; when + ``require_amount`` is true, a currency-anchored amount alone also + fires; when ``require_deadline`` is true, a deadline phrase alone + also fires. With both flags false the rule matches on verb only + (verb-only mode). + + The verb pattern covers first-person promise verbs *and* proposal + / SOW commitment markers ("Cost: $X", "fixed scope", + "Deliverables", "Timeline: N days", "I propose"). The amount + pattern requires a currency marker adjacent to the number so URL + fragments don't false-positive. + """ + if not text or not text.strip(): + return False + + if not isinstance(params, dict): + params = {} + require_amount = bool(params.get("require_amount", True)) + require_deadline = bool(params.get("require_deadline", False)) + + verb_match = bool(_COMMITMENT_VERB_PATTERN.search(text)) + + # Verb-only mode: neither supporting signal is enabled. + if not require_amount and not require_deadline: + return verb_match + + amount_match = require_amount and bool( + _COMMITMENT_AMOUNT_FALLBACK.search(text) + ) + deadline_match = require_deadline and bool( + _COMMITMENT_DEADLINE_PATTERN.search(text) + ) + return verb_match or amount_match or deadline_match diff --git a/src/uipath/runtime/governance/native/guardrail_compensation.py b/src/uipath/runtime/governance/native/guardrail_compensation.py new file mode 100644 index 0000000..d346611 --- /dev/null +++ b/src/uipath/runtime/governance/native/guardrail_compensation.py @@ -0,0 +1,311 @@ +"""Compensating governance for disabled centralized guardrails. + +When a ``guardrail_fallback`` rule fires (the guardrail is mapped to +UiPath but the centralized policy is disabled), the framework asks the +governance-server to run the real guardrail check via its +``/{org_id}/agenticgovernance_/api/v1/runtime/govern`` endpoint. + +This module owns only the **local concerns**: a bounded background +pool that schedules the call without blocking the agent hook, and a +trace-id capture that runs on the caller thread before the worker hop +(the worker has no OpenTelemetry context). + +The actual HTTP call — URL composition, auth, headers, JSON +serialisation, env-backed job-context auto-fill — is the +:class:`uipath.core.governance.GovernanceCompensationProvider`'s job. +Callers inject a concrete provider implementation, and this module +just builds the :class:`GovernRequest` wire model and hands it off. + +The call is **fire-and-forget**: the server runs the guardrail AND +writes the audit trace from its side. The agent doesn't inspect the +response — it only cares about whether the call reached the server. + +The compensator is **instance-scoped**: each :class:`GovernanceRuntime` +owns its own pool and semaphore. ``uipath eval`` parallel runtimes +don't share workers, queue slots, or saturation state — one runtime's +spam can't silently drop another's compensation calls. + +The compensator does **not** read host env vars and does not resolve +trace ids itself. It propagates the caller's ``contextvars`` (which +hold the live OTel span) across the worker-thread hop via +:func:`contextvars.copy_context`, so the provider can resolve trace +context at HTTP-call time inside the captured context. +""" + +from __future__ import annotations + +import atexit +import contextvars +import logging +import threading +import weakref +from concurrent.futures import ThreadPoolExecutor +from typing import Any + +from uipath.core.governance import ( + FiredRule, + GovernanceCompensationProvider, + GovernRequest, +) + +logger = logging.getLogger(__name__) + + +# ---------------------------------------------------------------------------- +# Process-wide cleanup machinery +# +# One ``atexit`` hook walks a ``WeakSet`` of live compensators on exit and +# closes each. Bounded atexit registrations (N runtimes → 1 hook, not N) and +# weakref tracking so a disposed compensator can be GC'd. Same pattern as +# :class:`uipath.runtime.governance._audit.base.AuditManager`. +# ---------------------------------------------------------------------------- + +_live_compensators: weakref.WeakSet[GuardrailCompensator] = weakref.WeakSet() +_atexit_registered = False +_atexit_lock = threading.Lock() + + +def _process_cleanup_compensators() -> None: + """Process-exit handler: close every live compensator.""" + for compensator in list(_live_compensators): + try: + compensator.close() + except Exception as exc: # noqa: BLE001 - exit cleanup must not raise + logger.debug("Compensator process cleanup error: %s", exc) + + +def _register_compensator_for_cleanup(compensator: GuardrailCompensator) -> None: + """Add ``compensator`` to the cleanup set + ensure atexit is wired once.""" + global _atexit_registered + _live_compensators.add(compensator) + if _atexit_registered: + return + with _atexit_lock: + if not _atexit_registered: + atexit.register(_process_cleanup_compensators) + _atexit_registered = True + + +# ---------------------------------------------------------------------------- +# Stateless helpers +# ---------------------------------------------------------------------------- + + +def disabled_guardrails(audit: Any, policy_index: Any) -> list[FiredRule]: + """Return per-rule metadata for each fired guardrail-fallback rule. + + A guardrail rule fires only when it is mapped to UiPath + (``mapped_to_uipath`` true) but disabled (``policy_enabled`` false) — + see the ``guardrail_fallback`` operator. The validator name (e.g. + ``pii_detection``) is read from the rule's ``guardrail_fallback`` + check config and used as the validator on the compensating call. + + One :class:`FiredRule` entry is emitted per matching + ``guardrail_fallback`` condition. Rules in this codebase declare a + single fallback condition each, so the returned list has one entry + per fired rule in practice; multi-condition rules would emit more + than one entry sharing the same ``rule_id``. + """ + out: list[FiredRule] = [] + for ev in audit.evaluations: + if not ev.matched: + continue + rule = policy_index.get_rule(ev.rule_id) + if rule is None: + continue + for check in rule.checks: + for cond in check.conditions: + if cond.operator != "guardrail_fallback": + continue + if not isinstance(cond.value, dict): + continue + # The ``guardrail_fallback`` operator at evaluation time + # only matches when ``mapped_to_uipath=True`` AND + # ``policy_enabled=False``. We re-check here defensively + # so a future code path that bypasses the evaluator (or + # a multi-condition rule that fired on a sibling check) + # can't trigger a compensation call for a guardrail + # that isn't actually disabled. + if not bool(cond.value.get("mapped_to_uipath", False)): + continue + if bool(cond.value.get("policy_enabled", True)): + continue + validator = str(cond.value.get("validator", "")) + if validator: + out.append( + FiredRule( + rule_id=ev.rule_id, + rule_name=ev.rule_name, + pack_name=getattr(rule, "pack_name", "") or "", + validator=validator, + ) + ) + return out + + +def _validators(rules: list[FiredRule]) -> list[str]: + """Distinct validator names from the fired rules, preserving order.""" + return list(dict.fromkeys(r.validator for r in rules if r.validator)) + + +# ---------------------------------------------------------------------------- +# GuardrailCompensator +# ---------------------------------------------------------------------------- + + +class GuardrailCompensator: + """Instance-scoped compensating-governance dispatcher. + + Each :class:`GovernanceRuntime` constructs one. Owns: + + - A :class:`ThreadPoolExecutor` (default 4 workers) that runs the + ``/runtime/govern`` POST off the agent's hook thread. + - A :class:`threading.BoundedSemaphore` (default cap = workers × 4) + that bounds total in-flight submissions (running + queued) so a + misbehaving agent firing compensation faster than the server can + absorb can't grow memory without limit. Saturated submissions are + dropped with a warning. + + Process exit cancels queued work via a single process-level atexit + handler (see :func:`_process_cleanup_compensators`); running tasks + finish bounded by the provider's HTTP timeout. + + Fire-and-forget: :meth:`submit` returns immediately. The actual HTTP + work is delegated to :meth:`GovernanceCompensationProvider.compensate` + — this class never touches URL/headers/auth/JSON itself. + """ + + _DEFAULT_MAX_WORKERS = 4 + # Queue depth multiplier — total in-flight cap = max_workers × this. + _INFLIGHT_OVERSUBSCRIPTION = 4 + + def __init__( + self, + provider: GovernanceCompensationProvider, + *, + max_workers: int = _DEFAULT_MAX_WORKERS, + inflight_oversubscription: int = _INFLIGHT_OVERSUBSCRIPTION, + ) -> None: + """Construct a compensator bound to one provider. + + The compensator does not carry a trace id. Trace-id resolution + is the provider's responsibility at HTTP-call time. To preserve + live OTel context across the thread-pool hop (worker threads + don't inherit ``contextvars``), :meth:`submit` runs the worker + callable inside a snapshot captured via + :func:`contextvars.copy_context` — so the caller's OTel span is + still visible when the provider runs on the worker. + + Args: + provider: The :class:`GovernanceCompensationProvider` that + actually fires the ``/runtime/govern`` POST. + max_workers: Concurrent worker threads in the pool. + inflight_oversubscription: How deep the work queue grows + before saturated submissions get dropped. Total cap is + ``max_workers * inflight_oversubscription``. + """ + self._provider = provider + self._inflight_cap = max_workers * inflight_oversubscription + self._pool = ThreadPoolExecutor( + max_workers=max_workers, + thread_name_prefix="governance-compensation", + ) + self._inflight = threading.BoundedSemaphore(self._inflight_cap) + _register_compensator_for_cleanup(self) + + def submit( + self, + rules: list[FiredRule], + data: dict[str, Any], + hook: str, + src_timestamp: str, + agent_name: str, + runtime_id: str, + ) -> None: + """Schedule a /runtime/govern call on the bounded background pool. + + Fire-and-forget. Returns immediately; the call runs on a worker + thread. When the in-flight queue is saturated the call is + dropped with a warning and the agent continues. + + ``rules`` is the per-rule metadata from :func:`disabled_guardrails`; + the validators sent to the guardrail API are derived from it. + + The current :mod:`contextvars` context (which carries the live + OpenTelemetry span) is captured here and re-applied inside the + worker via :meth:`contextvars.Context.run`. This lets the + provider see the live OTel context on the worker thread — + without the snapshot the worker would inherit an empty context + and the provider could only resolve env-based trace ids. + + Never raises — including when the pool has already been shut down. + """ + if not rules: + return + + validators = _validators(rules) + if not validators: + return + + if not self._inflight.acquire(blocking=False): + logger.warning( + "Compensation pool saturated (>%d in flight); dropping call " + "(validators=[%s])", + self._inflight_cap, + ", ".join(validators), + ) + return + + request = GovernRequest( + validators=validators, + rules=rules, + data=data, + hook=hook, + src_timestamp=src_timestamp, + agent_name=agent_name, + runtime_id=runtime_id, + ) + + provider = self._provider + inflight = self._inflight + # Snapshot the caller's contextvars (OTel span lives in there + # for Python OTel >= 1.x). The worker runs inside this snapshot + # so the provider sees the live span at HTTP-call time. + ctx = contextvars.copy_context() + + def _run() -> None: + try: + provider.compensate(request) + except Exception as exc: # noqa: BLE001 - fail-open by contract + logger.warning( + "Compensation worker failed (validators=[%s]): %s", + ", ".join(validators), + exc, + ) + finally: + inflight.release() + + try: + self._pool.submit(ctx.run, _run) + except RuntimeError as exc: + # Pool was shut down (atexit, dispose, or test teardown) — + # release the semaphore slot we took and log; never raise. + self._inflight.release() + logger.warning( + "Compensation pool unavailable (validators=[%s]): %s", + ", ".join(validators), + exc, + ) + + def close(self) -> None: + """Cancel queued tasks. Running tasks finish bounded by the provider HTTP timeout. + + ``wait=False`` returns immediately so caller / process shutdown + isn't held up; ``cancel_futures=True`` drops anything not yet + running. Idempotent — calling close on an already-closed pool + is a logged no-op. + """ + try: + self._pool.shutdown(wait=False, cancel_futures=True) + except Exception as exc: # noqa: BLE001 - shutdown must not raise + logger.debug("Compensator shutdown error: %s", exc) diff --git a/src/uipath/runtime/governance/native/loader.py b/src/uipath/runtime/governance/native/loader.py deleted file mode 100644 index 5b45d21..0000000 --- a/src/uipath/runtime/governance/native/loader.py +++ /dev/null @@ -1,342 +0,0 @@ -"""Policy pack loader. - -Per-runtime policy loading: a :class:`PolicyLoader` instance owns one -provider plus the cached PolicyIndex and prefetch state. The runtime -never contacts the governance backend directly; the provider owns the -wire / transport (auth, retries, telemetry). When no provider is -supplied, or the provider raises / returns an empty body / yields zero -rules, the loader returns an empty PolicyIndex and the agent runs -without any rules. - -The loader holds **no module-level state**. ``uipath eval`` can spin up -multiple ``GovernanceRuntime`` instances in the same process and each -gets its own loader with its own provider, cache, and selector — no -cross-instance interference. -""" - -from __future__ import annotations - -import logging -import threading -import time -from collections import Counter - -import yaml -from uipath.core.governance import ( - EnforcementMode, - GovernancePolicyProvider, - PolicyContext, -) - -from uipath.runtime.governance.native._yaml_to_index import build_policy_index_from_yaml -from uipath.runtime.governance.native.models import PolicyIndex - -logger = logging.getLogger(__name__) - - -class PolicyLoader: - """Instance-scoped policy loader bound to one provider. - - Owns the policy-index cache, prefetch coordination, and the - conversational selector for a single :class:`GovernanceRuntime` - instance. Multiple loaders coexist in the same process without - clobbering each other. - - Typical lifecycle:: - - loader = PolicyLoader(provider, is_conversational=False) - loader.prefetch() # non-blocking, optional - index = loader.get_policy_index() # cached after first call - - When ``provider`` is ``None``, every load returns an empty - PolicyIndex without invoking anything. - """ - - # Upper bound on how long :meth:`get_policy_index` waits for an - # in-flight prefetch before falling back to an empty PolicyIndex. - # The provider owns its own transport timeouts; this is the runtime's - # ceiling on blocking the first hook fire. - _PROVIDER_WAIT_SECONDS = 10.0 - - def __init__( - self, - provider: GovernancePolicyProvider | None, - *, - is_conversational: bool | None = None, - ) -> None: - """Construct a per-runtime policy loader. - - Args: - provider: Policy source. ``None`` means no policies will be - loaded — the loader yields an empty PolicyIndex. - is_conversational: Whether the hosted agent is - conversational. Travels in the :class:`PolicyContext` - so the provider can select the matching policy view. - ``None`` leaves the selector unset — the provider - applies its default. - """ - self._provider = provider - self._is_conversational = is_conversational - self._policy_index: PolicyIndex | None = None - # Enforcement mode supplied by the provider on the most recent - # load. ``None`` until the first load lands (or whenever the - # provider omits a mode); :attr:`enforcement_mode` returns - # ``AUDIT`` in that case. Instance-scoped so parallel runtimes - # (e.g. ``uipath eval``) don't clobber each other. - self._enforcement_mode: EnforcementMode | None = None - # ``_prefetch_event`` is set once the background load finishes - # (success OR failure); callers of ``get_policy_index`` wait on - # it. ``_prefetch_lock`` guards the start-once semantics so - # concurrent ``prefetch`` calls don't kick off duplicate threads. - self._prefetch_event: threading.Event | None = None - self._prefetch_lock = threading.Lock() - - def prefetch(self) -> None: - """Kick off a background load of the policy index. - - Non-blocking. Designed to be called as early as possible (at - :class:`GovernanceRuntime` init) so the policy fetch overlaps - with the rest of agent setup. The result lands in this loader's - cache; :meth:`get_policy_index` waits on the prefetch when it's - in flight. - - Idempotent: subsequent calls while the first is running are - no-ops, and calls after completion are no-ops. No-op when no - provider is supplied — there's nothing to fetch. - """ - if self._provider is None: - return - - with self._prefetch_lock: - if self._policy_index is not None: - return # already loaded - if self._prefetch_event is not None: - return # already in flight - event = threading.Event() - self._prefetch_event = event - - def _worker() -> None: - try: - loaded = self.load_policy_index() - except Exception as exc: # noqa: BLE001 - logged; first hook will retry sync - logger.warning("Policy prefetch failed: %s", exc) - else: - with self._prefetch_lock: - # Only publish if we're still the live prefetch. - # ``clear_cache`` nulls ``_prefetch_event`` to retire - # an in-flight worker; in that case the loaded value - # belongs to a stale generation and must be dropped - # rather than clobbering the just-cleared state. - if self._prefetch_event is event: - self._policy_index = loaded - finally: - event.set() - - threading.Thread( - target=_worker, - name="governance-policy-prefetch", - daemon=True, - ).start() - - def get_policy_index(self) -> PolicyIndex: - """Get the cached policy index, loading if necessary. - - Resolution order on first call: - 1. If a prefetch (see :meth:`prefetch`) is in flight, wait - for it to complete (bounded by ``_PROVIDER_WAIT_SECONDS``). - 2. Synchronously call :meth:`load_policy_index` (which invokes - the provider). - 3. Empty PolicyIndex when no provider is supplied or the - provider fails / returns nothing. - - Result is cached for the loader's lifetime; per-hook evaluation - never touches the network. Call :meth:`clear_cache` to force a - refetch (mainly for tests). - """ - if self._policy_index is not None: - return self._policy_index - - event = self._prefetch_event - if event is not None: - completed = event.wait(timeout=self._PROVIDER_WAIT_SECONDS) - if completed and self._policy_index is not None: - return self._policy_index - if not completed: - # Timeout: cache an empty index so we don't re-wait the - # full timeout on every subsequent hook. - logger.warning( - "Policy prefetch did not complete in %.1fs; " - "agent will run without any policies", - self._PROVIDER_WAIT_SECONDS, - ) - self._policy_index = PolicyIndex() - return self._policy_index - - # Completed but produced no PolicyIndex — the worker hit an - # unexpected error. Do NOT cache the empty result: caching - # would permanently disable governance for the loader's - # lifetime even though a later prefetch / clear_cache could - # still recover. Return an empty index for this call only. - logger.warning( - "Policy prefetch completed but produced no PolicyIndex " - "(see prior WARN for the root cause); agent will run " - "without any policies for this call" - ) - return PolicyIndex() - - # No prefetch was started (direct callers / tests). Sync load. - self._policy_index = self.load_policy_index() - return self._policy_index - - def load_policy_index(self) -> PolicyIndex: - """Synchronously load and parse the policy index. - - Returns: - PolicyIndex parsed from the provider response. Empty - PolicyIndex when no provider is supplied, the provider - raises, the YAML is malformed, or the response yields - zero rules. - """ - start = time.perf_counter() - - index = ( - self._load_from_provider(self._provider) - if self._provider is not None - else None - ) - - if index is not None: - self._log_index_summary(index) - logger.info( - "Policy index ready: source=provider, total_ms=%.1f", - (time.perf_counter() - start) * 1000, - ) - return index - - reason = self._empty_index_reason() - logger.info( - "Policy index ready: source=empty (%s), total_ms=%.1f", - reason, - (time.perf_counter() - start) * 1000, - ) - return PolicyIndex() - - def _empty_index_reason(self) -> str: - """Diagnose why policy loading produced nothing.""" - if self._provider is None: - return "no policy provider supplied" - return "provider returned no policies (error / empty body / zero rules)" - - def _load_from_provider( - self, provider: GovernancePolicyProvider - ) -> PolicyIndex | None: - """Fetch and parse the policy index via the supplied provider. - - Applies the provider-supplied enforcement mode as a side effect. - Returns ``None`` when the provider raises, when the YAML is - malformed, or when the resulting index has no rules — caller - returns an empty PolicyIndex in those cases. - - Takes ``provider`` as a parameter (rather than reading - ``self._provider``) so the type system can prove the call site - is non-None — :meth:`load_policy_index` guards on ``None`` and - passes the narrowed value through. - """ - start = time.perf_counter() - - ctx = PolicyContext(is_conversational=self._is_conversational) - - try: - response = provider.get_policy(ctx) - except Exception as exc: # noqa: BLE001 - fail-open by contract - logger.warning("Policy provider get_policy failed: %s", exc) - return None - - if response.mode is not None: - self._enforcement_mode = response.mode - logger.info("Enforcement mode set from provider: %s", response.mode.value) - - if not response.policies: - logger.warning( - "Policy provider returned empty policies field; " - "agent will run without any policies" - ) - return None - - try: - index = build_policy_index_from_yaml(response.policies) - except yaml.YAMLError as exc: - logger.warning("Policy YAML from provider was malformed: %s", exc) - return None - except Exception as exc: # noqa: BLE001 - never let load break agent startup - logger.warning("Failed to build PolicyIndex from provider YAML: %s", exc) - return None - - if index.total_rules == 0: - logger.warning( - "Policy YAML from provider yielded zero rules; " - "agent will run without any policies" - ) - return None - - elapsed_ms = (time.perf_counter() - start) * 1000 - logger.info( - "Loaded policy index from provider: packs=%s, rules=%d, elapsed_ms=%.1f", - index.pack_names, - index.total_rules, - elapsed_ms, - ) - return index - - def _log_index_summary(self, index: PolicyIndex) -> None: - """Log summary of loaded policy index.""" - hook_counts: Counter[str] = Counter() - for rule in index.all_rules: - hook_counts[rule.hook.value] += 1 - - logger.debug( - "Policy packs: %s, total rules: %d, by hook: %s", - index.pack_names, - index.total_rules, - dict(hook_counts), - ) - - @property - def enforcement_mode(self) -> EnforcementMode: - """Active enforcement mode for this loader. - - The canonical source is whatever the policy provider supplied on - the most recent load. Until that load lands (or if the provider - omits a mode), the default is :attr:`EnforcementMode.AUDIT` — - evaluate and log without blocking. Defaulting to AUDIT avoids - the chicken-and-egg where a DISABLED default would short-circuit - evaluation before the background load could ever opt the tenant - in. - """ - return ( - self._enforcement_mode - if self._enforcement_mode is not None - else EnforcementMode.AUDIT - ) - - @property - def available_packs(self) -> list[str]: - """Pack names from the currently loaded policy index. - - Returns whatever the provider supplied on the most recent load. - Empty list if no index has been loaded yet. - """ - if self._policy_index is None: - return [] - return self._policy_index.pack_names - - def clear_cache(self) -> None: - """Clear the cached policy index and any in-flight prefetch state. - - Next call to :meth:`get_policy_index` will reload from the - provider. - """ - with self._prefetch_lock: - self._policy_index = None - self._prefetch_event = None - logger.debug("Policy index cache cleared") diff --git a/src/uipath/runtime/governance/runtime.py b/src/uipath/runtime/governance/runtime.py index c8f9dd9..ab3d177 100644 --- a/src/uipath/runtime/governance/runtime.py +++ b/src/uipath/runtime/governance/runtime.py @@ -1,36 +1,45 @@ """Governance runtime wrapper. -Wraps a :class:`UiPathRuntimeProtocol` delegate so policy data is sourced -through a :class:`GovernancePolicyProvider`. The provider owns the wire -/ transport (auth, retries, telemetry); the runtime only consumes the -parsed :class:`PolicyResponse`. There is no direct backend fallback — -when ``policy_provider`` is ``None`` the agent runs without any -governance policies. - -The wiring layer (uipath CLI) decides whether to construct -``GovernanceRuntime`` at all (feature flag, project config, etc.) and -passes ``is_conversational`` explicitly when it knows the agent type. -The runtime layer does not introspect the delegate's private attributes -to discover that. - -**Staging caveat — policy loading only, no enforcement yet.** This -module is the policy-loading scaffold: ``__init__`` constructs an -instance-scoped :class:`PolicyLoader` and kicks off a background -prefetch. ``execute`` / ``stream`` / ``get_schema`` / ``dispose`` are -pure passthroughs — no per-hook policy evaluation runs. The evaluator -and framework adapter wiring that consumes the loader's policy index -lands in a follow-up slice. Customers constructing -:class:`GovernanceRuntime` today get policy loading without policy -enforcement; this is intentional and will change when the evaluator -slice merges. +Wraps a :class:`UiPathRuntimeProtocol` delegate and carries a resolved +policy snapshot — a :class:`PolicyIndex` and :class:`EnforcementMode` +supplied by the caller. The wrapper performs no I/O at construction, +holds no background thread, retains no policy provider, and reads no +host environment variables. + +The caller (typically the host CLI) is expected to: + +- ``await provider.get_policy_async(PolicyContext(...))`` itself, +- compile the response YAML via + :func:`uipath.runtime.governance.native.build_policy_index_from_yaml`, +- skip wrapping entirely when the response mode is + :attr:`EnforcementMode.DISABLED`, +- pass the resolved ``PolicyIndex`` and ``EnforcementMode`` into the + constructor. + +The wrapper owns the BEFORE_AGENT / AFTER_AGENT lifecycle boundary +when an evaluator is supplied at construction. Framework adapters +intentionally skip chain-level events so nested chain runs don't fire +duplicate boundary evaluations; the runtime layer is the unambiguous +"one invocation = one boundary" point, so it owns those hooks. Per-step +hooks (BEFORE_MODEL, AFTER_MODEL, TOOL_CALL, AFTER_TOOL) are fired by +adapters that observe per-step events. + +Trace-id is intentionally **not** carried on this wrapper. The +governance compensator captures the live OTel context across the +thread-pool hop via :func:`contextvars.copy_context`, and the +injected provider resolves the canonical trace id at HTTP-call time. +The runtime layer is fully env-free for this path. """ from __future__ import annotations +import json import logging from typing import Any, AsyncGenerator -from uipath.core.governance import GovernancePolicyProvider +from uipath.core.governance import EnforcementMode +from uipath.core.governance.exceptions import GovernanceBlockException +from uipath.core.serialization import serialize_object from uipath.runtime.base import ( UiPathExecuteOptions, @@ -38,89 +47,166 @@ UiPathStreamOptions, ) from uipath.runtime.events import UiPathRuntimeEvent -from uipath.runtime.governance.native.loader import PolicyLoader +from uipath.runtime.governance.native.evaluator import GovernanceEvaluator +from uipath.runtime.governance.native.models import PolicyIndex from uipath.runtime.result import UiPathRuntimeResult from uipath.runtime.schema import UiPathRuntimeSchema logger = logging.getLogger(__name__) -class GovernanceRuntime: +def _serialize_payload(payload: Any) -> str: + """Serialize an agent input / output to a string for evaluator checks. + + The native evaluator's BEFORE_AGENT / AFTER_AGENT checks scan a + flat string. ``None`` becomes ``""``, ``str`` passes through (so + regex / sentiment checks don't see JSON quotes around the bare + text), and everything else is normalized via + :func:`uipath.core.serialization.serialize_object` (handles + Pydantic / dataclass / datetime / nested structures) and then + JSON-encoded. + """ + if payload is None: + return "" + if isinstance(payload, str): + return payload + try: + return json.dumps(serialize_object(payload)) + except Exception: # noqa: BLE001 — last-resort string fallback + return str(payload) + + +class UiPathGovernedRuntime: """Governance wrapper over a :class:`UiPathRuntimeProtocol` delegate. - Constructs an instance-scoped :class:`PolicyLoader` bound to the - supplied provider and kicks off a non-blocking prefetch so the - policy pack overlaps with the rest of agent setup. When - ``policy_provider`` is ``None``, the loader yields an empty - PolicyIndex and the agent runs without any governance policies for - the lifetime of this instance. - - **Policy loading only — no enforcement yet.** ``execute`` / ``stream`` - / ``get_schema`` / ``dispose`` are passthroughs to the delegate; no - per-hook policy evaluation runs in this slice. The evaluator and - framework adapter wiring that consumes the loader's policy index is - staged separately. + Holds a caller-resolved :class:`PolicyIndex` and + :class:`EnforcementMode` for the lifetime of the instance. + ``execute`` / ``stream`` / ``get_schema`` / ``dispose`` forward to + the delegate. + + When ``evaluator`` is supplied, :meth:`execute` and :meth:`stream` + fire ``BEFORE_AGENT`` before delegating and ``AFTER_AGENT`` after a + successful return. Without an evaluator the wrapper is a pure + pass-through. """ def __init__( self, delegate: UiPathRuntimeProtocol, - policy_provider: GovernancePolicyProvider | None, + policy_index: PolicyIndex, + enforcement_mode: EnforcementMode, *, - is_conversational: bool | None = None, + evaluator: GovernanceEvaluator | None = None, + agent_name: str = "", + runtime_id: str = "", ): - """Initialize the governance runtime. + """Initialize the governance runtime with a resolved policy snapshot. Args: delegate: The wrapped runtime to forward execution to. - policy_provider: Source of the policy pack. ``None`` means - no policies will be loaded — the agent runs without - governance for the lifetime of this instance. - is_conversational: Whether the hosted agent is - conversational. Forwarded into the provider's - :class:`PolicyContext` so it can pick the right policy - view (conversational vs autonomous). ``None`` (default) - leaves the selector unset — the provider applies its - default. The wiring layer (uipath CLI) is expected to - pass the concrete value when it knows the agent type. + policy_index: Resolved :class:`PolicyIndex` built from the + provider's :class:`PolicyResponse`. Pass an empty + ``PolicyIndex()`` to attach the wrapper without any + rules (useful when the wrapper exists for audit + emission only). + enforcement_mode: Resolved :class:`EnforcementMode` from + the provider's :class:`PolicyResponse`. The caller is + expected to skip wrapping entirely when the response + mode is :attr:`EnforcementMode.DISABLED`; this + constructor does not check. + evaluator: Optional :class:`GovernanceEvaluator` that + drives BEFORE_AGENT / AFTER_AGENT inside + :meth:`execute` / :meth:`stream`. When ``None`` the + wrapper is a pure passthrough — the caller is expected + to fire those evaluations itself. + agent_name: Name of the agent (the runtime's entrypoint). + Passed through to the evaluator's hook methods. + runtime_id: Runtime-instance id (conversation id, job id, + or a synthetic per-run id). Passed through so + per-runtime state routes cleanly. """ self._delegate = delegate - self._loader = PolicyLoader( - policy_provider, - is_conversational=is_conversational, - ) - self._loader.prefetch() - - @property - def loader(self) -> PolicyLoader: - """The instance-scoped policy loader. - - Exposed so adapters / evaluators wired into this runtime can - call :meth:`PolicyLoader.get_policy_index` at hook time. + self._policy_index = policy_index + self._enforcement_mode = enforcement_mode + self._evaluator = evaluator + self._agent_name = agent_name + self._runtime_id = runtime_id + + def _fire_before_agent(self, input: Any) -> None: + """Fire BEFORE_AGENT when an evaluator is wired; otherwise no-op. + + ``GovernanceBlockException`` propagates — that's how + ENFORCE-mode DENY rules halt a run. Anything else is logged + and swallowed so a governance bug never breaks the agent. """ - return self._loader + if self._evaluator is None: + return + try: + self._evaluator.evaluate_before_agent( + agent_input=_serialize_payload(input), + agent_name=self._agent_name, + runtime_id=self._runtime_id, + ) + except GovernanceBlockException: + raise + except Exception as exc: # noqa: BLE001 — never break a run on audit failure + logger.warning("BEFORE_AGENT governance evaluation failed: %s", exc) + + def _fire_after_agent(self, result: UiPathRuntimeResult) -> None: + """Fire AFTER_AGENT against ``result.output``. + + Same exception policy as :meth:`_fire_before_agent`. + """ + if self._evaluator is None: + return + try: + self._evaluator.evaluate_after_agent( + agent_output=_serialize_payload(result.output), + agent_name=self._agent_name, + runtime_id=self._runtime_id, + ) + except GovernanceBlockException: + raise + except Exception as exc: # noqa: BLE001 + logger.warning("AFTER_AGENT governance evaluation failed: %s", exc) async def execute( self, input: dict[str, Any] | None = None, options: UiPathExecuteOptions | None = None, ) -> UiPathRuntimeResult: - """Execute the delegate. Policy evaluation hooks are wired separately.""" - return await self._delegate.execute(input, options=options) + """Execute the delegate, firing BEFORE_AGENT / AFTER_AGENT around it. + + AFTER_AGENT fires only on successful return — if the delegate + raises, there's no output to evaluate. + """ + self._fire_before_agent(input) + result = await self._delegate.execute(input, options=options) + self._fire_after_agent(result) + return result async def stream( self, input: dict[str, Any] | None = None, options: UiPathStreamOptions | None = None, ) -> AsyncGenerator[UiPathRuntimeEvent, None]: - """Stream events from the delegate. Hooks are wired separately.""" + """Stream events from the delegate, firing BEFORE_AGENT first. + + AFTER_AGENT fires once a :class:`UiPathRuntimeResult` event is + observed in the stream — that's the runtime's contract for + signalling a completed invocation. Intermediate state events + pass through untouched. + """ + self._fire_before_agent(input) async for event in self._delegate.stream(input, options=options): + if isinstance(event, UiPathRuntimeResult): + self._fire_after_agent(event) yield event async def get_schema(self) -> UiPathRuntimeSchema: - """Passthrough schema for the delegate.""" + """Forward schema lookup to the delegate.""" return await self._delegate.get_schema() async def dispose(self) -> None: - """Dispose the delegate.""" + """Forward disposal to the delegate.""" await self._delegate.dispose() diff --git a/tests/_helpers.py b/tests/_helpers.py deleted file mode 100644 index 2d3d924..0000000 --- a/tests/_helpers.py +++ /dev/null @@ -1,46 +0,0 @@ -"""Shared test-only helpers. - -Keeps test concerns out of the production governance package: shared -stubs live here rather than inside the production modules. - -The enforcement-mode reset helper is gone because the mode is now -instance-scoped on :class:`PolicyLoader` — tests that want a clean -slate just construct a fresh loader instead of touching a global. -""" - -from __future__ import annotations - -import time - -from uipath.core.governance import PolicyContext, PolicyResponse - - -class StubPolicyProvider: - """Minimal in-memory :class:`GovernancePolicyProvider` for tests. - - Records every :class:`PolicyContext` it receives so tests can assert - on the selector that travelled to the provider. Either returns a - pre-canned :class:`PolicyResponse` or raises a pre-canned exception; - the optional ``slow`` knob lets tests exercise the prefetch-wait - path. - """ - - def __init__( - self, - response: PolicyResponse | None = None, - raises: Exception | None = None, - slow: float = 0.0, - ): - self.calls: list[PolicyContext] = [] - self._response = response - self._raises = raises - self._slow = slow - - def get_policy(self, context: PolicyContext) -> PolicyResponse: - self.calls.append(context) - if self._slow: - time.sleep(self._slow) - if self._raises is not None: - raise self._raises - assert self._response is not None - return self._response diff --git a/tests/conftest.py b/tests/conftest.py index ba76eca..a6c5cd5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,7 +19,7 @@ def temp_dir() -> Generator[str, None, None]: yield tmp_dir -# Governance state — provider, conversational selector, policy cache, -# enforcement mode — is owned by each :class:`PolicyLoader` instance, -# so no autouse cross-test reset is needed. Tests that want a clean -# slate just construct a fresh loader. +# Governance state is held inline on the :class:`UiPathGovernedRuntime` +# instance — the host passes a resolved :class:`PolicyIndex` + +# :class:`EnforcementMode` into the constructor, no module-level +# state, no cross-test reset needed. diff --git a/tests/test_commitment_concern.py b/tests/test_commitment_concern.py new file mode 100644 index 0000000..a46149b --- /dev/null +++ b/tests/test_commitment_concern.py @@ -0,0 +1,205 @@ +"""Tests for the commitment_concern check (A.10.4). + +The check now uses OR semantics: a verb match, an amount match, or a +deadline match is each sufficient when its enabling flag is on. With +both flags false the rule matches verb-only. + +The verb pattern also covers proposal / SOW style commitment markers +("Cost: $X", "fixed scope", "Deliverables", "Timeline", "I propose") +so formal-business commitments without first-person verbs still fire. + +Amount detection requires a currency marker adjacent to the number to +prevent URL fragments (forum-post IDs, image dimensions, etc.) from +false-positiving. +""" + +from __future__ import annotations + +import pytest + +from uipath.runtime.governance.native.evaluator import GovernanceEvaluator + +# --------------------------------------------------------------------------- +# The proposal-style sample that originally slipped through the rule. +# Contains: "Cost: $780 (fixed for the above scope)", "Deliverables", +# "Timeline: 4 days total", "I propose", a forum URL with a 6-digit ID. +# Triple-quoted so we keep the line breaks the model produced. +# --------------------------------------------------------------------------- +SAMPLE_PROPOSAL = """To address your concerns, I reviewed the official UiPath site you referenced and relevant resources on uipath.com to inform a fast stabilization plan. Notable findings include: a community CI/CD sample for UiPath projects (https://forum.uipath.com/t/announcement-ci-cd-pipeline-sample-implementation-s-for-uipath-projects-alpha/667851). + +Here's how I propose we turn your software around quickly: + +Plan +- Triage (logs + reproduce) +- Quick stabilization + +Deliverables +- Defect triage report + +Timeline: 4 days total +- Day 1: Triage + reproduction + +Cost: $780 (fixed for the above scope) +""" + + +@pytest.mark.parametrize( + "text", + [ + "Cost: $780 (fixed for the above scope)", + "Deliverables: a, b, c", + "Timeline: 4 days total for the whole engagement", + "I propose we turn this around in a week", + "We will refund the difference", + "I'll deliver the report by Friday", + "the warranty covers parts only", + "fixed price of one hundred dollars", + ], +) +def test_verb_match_alone_fires(text: str) -> None: + """Each verb-style commitment marker fires on its own (verb-only mode).""" + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": False, "require_deadline": False} + ) + is True + ) + + +def test_full_proposal_sample_fires() -> None: + """The originally-missed proposal output now fires.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + SAMPLE_PROPOSAL, + {"require_amount": False, "require_deadline": False}, + ) + is True + ) + + +@pytest.mark.parametrize( + "text", + [ + "$780", + "We charge USD 1,200 per seat", + "The fee is 500 EUR", + ], +) +def test_amount_alone_fires_when_require_amount_true(text: str) -> None: + """Currency-anchored amount alone fires under OR semantics.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": True, "require_deadline": False} + ) + is True + ) + + +@pytest.mark.parametrize( + "text", + [ + "Task is 75% complete.", + "We maintain 99.9% uptime.", + "Battery at 50%.", + "Score: 12%.", + ], +) +def test_bare_percentage_does_not_fire(text: str) -> None: + """Status-only percentages must not trigger commitment_concern. + + Regression for the prior ``\\d{1,3}\\s*%`` branch in the amount + regex, which fired on benign status / progress text. Real + percentage-bearing commitments ("we'll give a 20% discount") + still fire via the verb pattern. + """ + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": True, "require_deadline": False} + ) + is False + ) + + +def test_percentage_with_verb_still_fires() -> None: + """A commitment verb co-occurring with a percentage still fires.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + "We will refund 100% of the purchase price.", + {"require_amount": True, "require_deadline": False}, + ) + is True + ) + + +def test_amount_alone_does_not_fire_when_require_amount_false() -> None: + """Amount-only text is silent when require_amount=False and no verb.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + "The list price is $780.", + {"require_amount": False, "require_deadline": False}, + ) + is False + ) + + +def test_deadline_alone_fires_when_require_deadline_true() -> None: + """Deadline phrase alone fires under OR semantics.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + "Will be done within 5 days.", + {"require_amount": False, "require_deadline": True}, + ) + is True + ) + + +def test_url_fragment_digits_do_not_false_positive() -> None: + """A long URL with embedded digits is not a 'commitment'. + + Catches the prior price-parser misbehaviour where Price.fromstring() + picked up forum-post IDs (e.g. ``667851``) and conflated them with + unrelated currency symbols elsewhere in the text. + """ + text = ( + "See https://forum.example.com/t/topic/667851 for details — " + "no commitment language here." + ) + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": True, "require_deadline": True} + ) + is False + ) + + +@pytest.mark.parametrize( + "text", + [ + "", + " ", + "Just chatting about the weather today.", + "The product is durable and well-made.", + ], +) +def test_no_signal_does_not_fire(text: str) -> None: + """Text without any commitment signal stays silent regardless of flags.""" + assert ( + GovernanceEvaluator._check_commitment_concern( + text, {"require_amount": True, "require_deadline": True} + ) + is False + ) + + +def test_non_dict_params_treated_as_defaults() -> None: + """``params`` of the wrong type degrades to defaults rather than crashing.""" + assert ( + GovernanceEvaluator._check_commitment_concern("we will refund", None) + is True + ) + assert ( + GovernanceEvaluator._check_commitment_concern( + "no verbs here", "garbage" + ) + is False + ) diff --git a/tests/test_enforcement_mode_default.py b/tests/test_enforcement_mode_default.py deleted file mode 100644 index 78230fd..0000000 --- a/tests/test_enforcement_mode_default.py +++ /dev/null @@ -1,114 +0,0 @@ -"""Tests for the default enforcement-mode resolution on :class:`PolicyLoader`. - -The default is :attr:`EnforcementMode.AUDIT` so the wrapper attaches at -runtime construction and the background policy load can run. If the -provider later returns ``disabled``, the loader records it and -:attr:`enforcement_mode` flips. - -Resolution (per :attr:`PolicyLoader.enforcement_mode`): -1. The provider-supplied value on the most recent load. -2. Default :attr:`EnforcementMode.AUDIT`. -""" - -from __future__ import annotations - -from uipath.core.governance import EnforcementMode, PolicyResponse - -from tests._helpers import StubPolicyProvider -from uipath.runtime.governance.native.loader import PolicyLoader - - -def test_default_mode_is_audit() -> None: - """No provider-supplied mode yet → AUDIT. - - AUDIT is the default so the wrapper attaches and the background - policy fetch can run. The backend can flip the mode to DISABLED - on fetch when the tenant has no policies. - """ - loader = PolicyLoader(None) - assert loader.enforcement_mode is EnforcementMode.AUDIT - - -def test_provider_disabled_wins_over_default() -> None: - """A provider supplying DISABLED overrides the AUDIT default.""" - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.DISABLED, policies="") - ) - loader = PolicyLoader(provider) - loader.load_policy_index() - assert loader.enforcement_mode is EnforcementMode.DISABLED - - -def test_provider_enforce_wins_over_default() -> None: - """A provider supplying ENFORCE flips the loader to enforce.""" - provider = StubPolicyProvider( - response=PolicyResponse( - mode=EnforcementMode.ENFORCE, - policies="standard: p\nrules: [{id: r1, hook: before_model, " - "checks: [{type: regex, patterns: ['x']}]}]\n", - ) - ) - loader = PolicyLoader(provider) - loader.load_policy_index() - assert loader.enforcement_mode is EnforcementMode.ENFORCE - - -def test_loader_with_none_mode_response_keeps_previous_value() -> None: - """Provider returning ``mode=None`` doesn't clobber a previously-set mode. - - The wire response model treats ``None`` as "no opinion" — the loader - must not overwrite a real value with it. Otherwise a transient - provider response could silently demote a tenant's enforcement - posture. - """ - p1 = StubPolicyProvider( - response=PolicyResponse( - mode=EnforcementMode.ENFORCE, - policies="standard: p\nrules: [{id: r1, hook: before_model, " - "checks: [{type: regex, patterns: ['x']}]}]\n", - ) - ) - loader = PolicyLoader(p1) - loader.load_policy_index() - assert loader.enforcement_mode is EnforcementMode.ENFORCE - - # A second provider response that omits mode should not flip back to AUDIT. - loader._provider = StubPolicyProvider( - response=PolicyResponse( - mode=None, - policies="standard: p\nrules: [{id: r1, hook: before_model, " - "checks: [{type: regex, patterns: ['x']}]}]\n", - ) - ) - loader.clear_cache() - loader.load_policy_index() - assert loader.enforcement_mode is EnforcementMode.ENFORCE - - -def test_two_loaders_carry_independent_enforcement_modes() -> None: - """The whole point of the refactor: parallel loaders don't share mode. - - Previously :func:`set_enforcement_mode` wrote a module global, so an - ENFORCE-mode loader and a DISABLED-mode loader running concurrently - in the same process clobbered each other (last writer wins). - Instance-scoped mode means each loader's mode is read-isolated. - """ - p_enforce = StubPolicyProvider( - response=PolicyResponse( - mode=EnforcementMode.ENFORCE, - policies="standard: e\nrules: [{id: r1, hook: before_model, " - "checks: [{type: regex, patterns: ['x']}]}]\n", - ) - ) - p_disabled = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.DISABLED, policies="") - ) - - enforce_loader = PolicyLoader(p_enforce) - disabled_loader = PolicyLoader(p_disabled) - - enforce_loader.load_policy_index() - disabled_loader.load_policy_index() - - assert enforce_loader.enforcement_mode is EnforcementMode.ENFORCE - assert disabled_loader.enforcement_mode is EnforcementMode.DISABLED diff --git a/tests/test_evaluator.py b/tests/test_evaluator.py new file mode 100644 index 0000000..2039182 --- /dev/null +++ b/tests/test_evaluator.py @@ -0,0 +1,420 @@ +"""Tests for the audit + enforcement behavior of GovernanceEvaluator. + +The evaluator's three load-bearing responsibilities: + +1. DISABLED enforcement mode short-circuits — no rules evaluated, no + audit events emitted, no exceptions raised. +2. AUDIT mode evaluates rules and emits audit events, but transforms + matched DENY actions into AUDIT so execution continues. +3. ENFORCE mode evaluates, emits audit, and raises + :class:`GovernanceBlockException` when a DENY rule matches. + +Plus a fail-safe contract: a misbehaving audit sink must not stop +evaluation from completing or propagate as an exception. The +evaluator is constructed with explicit dependencies (audit manager, +enforcement mode); no process-globals are involved. +""" + +from __future__ import annotations + +from typing import Any + +import pytest +from uipath.core.governance import EnforcementMode +from uipath.core.governance.exceptions import GovernanceBlockException +from uipath.core.governance.models import Action, LifecycleHook + +from uipath.runtime.governance._audit.base import ( + AuditEvent, + AuditManager, + AuditSink, + EventType, +) +from uipath.runtime.governance.native.evaluator import GovernanceEvaluator +from uipath.runtime.governance.native.models import ( + Check, + CheckContext, + Condition, + PolicyIndex, + PolicyPack, + Rule, +) + +# --------------------------------------------------------------------------- +# Test helpers +# --------------------------------------------------------------------------- + + +class _CapturingSink(AuditSink): + """Audit sink that records every event for assertions.""" + + def __init__(self) -> None: + self.events: list[AuditEvent] = [] + + @property + def name(self) -> str: + return "capturing" + + def emit(self, event: AuditEvent) -> None: + self.events.append(event) + + +def _deny_rule_on_input_contains(needle: str) -> Rule: + """Build a rule that DENIES when agent_input contains ``needle``.""" + return Rule( + rule_id="TEST-01", + name="Test deny on input", + clause="A.1.1", + hook=LifecycleHook.BEFORE_AGENT, + action=Action.DENY, + checks=[ + Check( + conditions=[ + Condition( + operator="contains", + field="agent_input", + value=needle, + ) + ], + action=Action.DENY, + message=f"Input must not contain {needle!r}", + ) + ], + ) + + +def _build_index_with(rule: Rule) -> PolicyIndex: + """Wrap a single rule in a one-pack PolicyIndex.""" + idx = PolicyIndex() + idx.add_pack( + PolicyPack( + name="test_pack", + version="1.0", + description="test", + rules=[rule], + ) + ) + return idx + + +def _ctx(agent_input: str) -> CheckContext: + return CheckContext( + hook=LifecycleHook.BEFORE_AGENT, + agent_name="test-agent", + runtime_id="run-1", + agent_input=agent_input, + ) + + +def _build_evaluator( + rule: Rule, + mode: EnforcementMode, + audit_manager: AuditManager | None = None, +) -> GovernanceEvaluator: + """Construct an evaluator with explicit deps — no process-globals involved.""" + return GovernanceEvaluator( + _build_index_with(rule), + enforcement_mode=mode, + audit_manager=audit_manager, + ) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def audit_setup() -> Any: + """Per-test :class:`AuditManager` + capturing sink — no default sinks. + + Returns ``(manager, sink)`` so a test can build evaluators with the + manager and inspect emitted events through the sink. Synchronous + mode keeps assertions deterministic. + """ + manager = AuditManager(async_mode=False, register_default_sinks=False) + sink = _CapturingSink() + manager.register_sink(sink) + yield manager, sink + manager.close() + + +# --------------------------------------------------------------------------- +# DISABLED mode +# --------------------------------------------------------------------------- + + +def test_disabled_mode_short_circuits_with_empty_record(audit_setup: Any) -> None: + """DISABLED returns an empty AuditRecord and emits nothing.""" + manager, sink = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("secret"), + EnforcementMode.DISABLED, + audit_manager=manager, + ) + + audit = evaluator.evaluate(_ctx("definitely contains secret")) + + assert audit.evaluations == [] + assert audit.final_action == Action.ALLOW + assert audit.metadata["enforcement_mode"] == "disabled" + assert sink.events == [] + + +def test_disabled_mode_does_not_raise_on_deny_match(audit_setup: Any) -> None: + """Even when a DENY rule WOULD match, DISABLED never raises.""" + manager, _ = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("blocked"), + EnforcementMode.DISABLED, + audit_manager=manager, + ) + + # Must not raise. + evaluator.evaluate(_ctx("this is blocked")) + + +# --------------------------------------------------------------------------- +# AUDIT mode +# --------------------------------------------------------------------------- + + +def test_audit_mode_transforms_deny_to_audit(audit_setup: Any) -> None: + """AUDIT mode evaluates rules but never returns a DENY final_action.""" + manager, _ = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("secret"), + EnforcementMode.AUDIT, + audit_manager=manager, + ) + + audit = evaluator.evaluate(_ctx("contains secret data")) + + assert len(audit.evaluations) == 1 + assert audit.evaluations[0].matched is True + assert audit.evaluations[0].action == Action.DENY # raw rule action preserved + assert audit.final_action == Action.AUDIT # mode-adjusted + assert audit.metadata["audit_mode_would_deny"] is True + + +def test_audit_mode_does_not_raise_on_deny_match(audit_setup: Any) -> None: + """AUDIT mode never raises GovernanceBlockException, even on a DENY hit.""" + manager, _ = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("blocked"), + EnforcementMode.AUDIT, + audit_manager=manager, + ) + + evaluator.evaluate(_ctx("this is blocked")) # must not raise + + +def test_audit_mode_emits_per_rule_and_summary_events(audit_setup: Any) -> None: + """One rule_evaluation event per rule + one hook_summary per evaluate().""" + manager, sink = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("secret"), + EnforcementMode.AUDIT, + audit_manager=manager, + ) + + evaluator.evaluate(_ctx("contains secret")) + + rule_events = [ + e for e in sink.events if e.event_type == EventType.RULE_EVALUATION + ] + summary_events = [ + e for e in sink.events if e.event_type == EventType.HOOK_END + ] + assert len(rule_events) == 1 + assert rule_events[0].hook == "BEFORE_AGENT" + assert rule_events[0].data["policy_id"] == "TEST-01" + assert rule_events[0].data["matched"] is True + assert rule_events[0].data["action"] == "deny" + # Mode travels on every event (PR #122 contract). + assert rule_events[0].data["enforcement_mode"] == EnforcementMode.AUDIT + + assert len(summary_events) == 1 + assert summary_events[0].data["matched_rules"] == 1 + assert summary_events[0].data["final_action"] == "audit" + assert summary_events[0].data["enforcement_mode"] == EnforcementMode.AUDIT + + +def test_audit_mode_unmatched_rule_logged_as_allow(audit_setup: Any) -> None: + """Unmatched rules still emit a rule_evaluation event with action='allow'.""" + manager, sink = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("secret"), + EnforcementMode.AUDIT, + audit_manager=manager, + ) + + evaluator.evaluate(_ctx("benign user query")) + + rule_events = [ + e for e in sink.events if e.event_type == EventType.RULE_EVALUATION + ] + assert len(rule_events) == 1 + assert rule_events[0].data["matched"] is False + assert rule_events[0].data["action"] == "allow" + + +# --------------------------------------------------------------------------- +# ENFORCE mode +# --------------------------------------------------------------------------- + + +def test_enforce_mode_raises_on_deny_match(audit_setup: Any) -> None: + """ENFORCE mode raises GovernanceBlockException when a DENY rule matches.""" + manager, _ = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("blocked"), + EnforcementMode.ENFORCE, + audit_manager=manager, + ) + + with pytest.raises(GovernanceBlockException) as exc_info: + evaluator.evaluate(_ctx("input is blocked")) + + exc = exc_info.value + assert exc.rule_id == "TEST-01" + assert exc.rule_name == "Test deny on input" + assert exc.audit_record is not None + assert exc.audit_record.final_action == Action.DENY + + +def test_enforce_mode_emits_audit_before_raising(audit_setup: Any) -> None: + """The audit trail must be emitted even when the call raises.""" + manager, sink = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("blocked"), + EnforcementMode.ENFORCE, + audit_manager=manager, + ) + + with pytest.raises(GovernanceBlockException): + evaluator.evaluate(_ctx("contains blocked")) + + rule_events = [ + e for e in sink.events if e.event_type == EventType.RULE_EVALUATION + ] + summary_events = [ + e for e in sink.events if e.event_type == EventType.HOOK_END + ] + assert len(rule_events) == 1 + assert summary_events[0].data["final_action"] == "deny" + assert summary_events[0].data["enforcement_mode"] == EnforcementMode.ENFORCE + + +def test_enforce_mode_returns_record_when_no_rule_matches(audit_setup: Any) -> None: + """No DENY hit → no raise; the AuditRecord is returned normally.""" + manager, _ = audit_setup + evaluator = _build_evaluator( + _deny_rule_on_input_contains("blocked"), + EnforcementMode.ENFORCE, + audit_manager=manager, + ) + + audit = evaluator.evaluate(_ctx("benign query")) + + assert audit.final_action == Action.ALLOW + assert audit.evaluations[0].matched is False + + +# --------------------------------------------------------------------------- +# Sink-failure isolation + no-audit-manager case +# --------------------------------------------------------------------------- + + +def test_sink_failure_does_not_propagate_or_block_evaluation( + audit_setup: Any, +) -> None: + """A broken sink must not make evaluate() raise or lose its return value. + + Contract: AuditManager wraps each sink's emit() in try/except with a + per-sink failure counter (circuit-breaker), so a sink exception + never propagates back to the evaluator. + """ + manager, capturing_sink = audit_setup + + class _BrokenSink(AuditSink): + @property + def name(self) -> str: + return "broken" + + def emit(self, event: AuditEvent) -> None: + raise RuntimeError("sink broke") + + manager.register_sink(_BrokenSink()) + + evaluator = _build_evaluator( + _deny_rule_on_input_contains("secret"), + EnforcementMode.AUDIT, + audit_manager=manager, + ) + + # Must complete without raising even with a broken sink registered. + audit = evaluator.evaluate(_ctx("contains secret")) + + assert audit.final_action == Action.AUDIT + # The non-broken capturing sink still got its events. + assert any( + e.event_type == EventType.RULE_EVALUATION for e in capturing_sink.events + ) + + +def test_no_audit_manager_short_circuits_emission() -> None: + """``audit_manager=None`` is a no-op — evaluation still completes. + + Replaces the previous test that mocked ``get_audit_manager`` to + raise. With explicit injection, the equivalent "no manager + available" state is simply ``audit_manager=None`` at construction. + """ + evaluator = _build_evaluator( + _deny_rule_on_input_contains("secret"), + EnforcementMode.AUDIT, + audit_manager=None, + ) + + # Must complete, return record, and not raise. + audit = evaluator.evaluate(_ctx("contains secret")) + + assert audit.final_action == Action.AUDIT + assert audit.evaluations[0].matched is True + + +# --------------------------------------------------------------------------- +# Protocol conformance smoke test +# --------------------------------------------------------------------------- + + +def test_governance_evaluator_satisfies_evaluator_protocol() -> None: + """GovernanceEvaluator must be usable wherever EvaluatorProtocol is expected. + + Mirrors the pattern from test_detached_bridge_satisfies_debug_protocol — + an explicit assignment to the protocol-typed variable documents the + structural contract. + """ + from uipath.core.adapters import EvaluatorProtocol + + evaluator: EvaluatorProtocol = GovernanceEvaluator(PolicyIndex()) + assert isinstance(evaluator, EvaluatorProtocol) + + +def test_evaluator_protocol_methods_resolvable_on_concrete() -> None: + """Every method the protocol declares must be callable on the concrete impl.""" + from uipath.core.adapters import EvaluatorProtocol + + evaluator: Any = GovernanceEvaluator(PolicyIndex()) + for method_name in ( + "evaluate_before_agent", + "evaluate_after_agent", + "evaluate_before_model", + "evaluate_after_model", + "evaluate_tool_call", + "evaluate_after_tool", + ): + assert callable(getattr(evaluator, method_name)) + # The variable annotation also asserts type compatibility at runtime + # because EvaluatorProtocol is @runtime_checkable. + assert isinstance(evaluator, EvaluatorProtocol) diff --git a/tests/test_evaluator_operators.py b/tests/test_evaluator_operators.py new file mode 100644 index 0000000..32e83c6 --- /dev/null +++ b/tests/test_evaluator_operators.py @@ -0,0 +1,672 @@ +"""Tests for ``GovernanceEvaluator`` operators and field resolution. + +Covers each operator implemented in :meth:`_apply_operator` plus the +``_check_*`` helper functions (vader, encoding, entropy, incident, +commitment) and the ``evaluate_*`` dispatchers. +""" + +from __future__ import annotations + +import pytest +from uipath.core.governance import EnforcementMode +from uipath.core.governance.models import Action, LifecycleHook + +from uipath.runtime.governance.native.evaluator import ( + _INCIDENT_PATTERNS, + GovernanceEvaluator, +) +from uipath.runtime.governance.native.models import ( + Check, + CheckContext, + Condition, + PolicyIndex, + PolicyPack, + Rule, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _evaluator() -> GovernanceEvaluator: + """Build a GovernanceEvaluator with an empty PolicyIndex (operators only). + + AUDIT is the default mode; operator tests don't care about + enforcement and we don't need an audit manager for purely + operator-level assertions. + """ + return GovernanceEvaluator(policy_index=PolicyIndex()) + + +def _ctx(**fields) -> CheckContext: + """Construct a CheckContext with sensible defaults plus overrides.""" + defaults = dict( + hook=LifecycleHook.AFTER_MODEL, + agent_name="agent", + runtime_id="rt-1", + ) + defaults.update(fields) + return CheckContext(**defaults) + + +def _rule_with_condition(operator: str, field: str, value, *, negate: bool = False) -> Rule: + return Rule( + rule_id="r1", + name="r1", + clause="", + hook=LifecycleHook.AFTER_MODEL, + action=Action.AUDIT, + checks=[ + Check( + conditions=[ + Condition(operator=operator, field=field, value=value, negate=negate) + ], + ) + ], + ) + + +# Mode is per-instance now — tests construct evaluators with the mode +# they need via the ``enforcement_mode`` kwarg. No process-globals to +# reset. + + +# --------------------------------------------------------------------------- +# Field resolution — _get_field_value +# --------------------------------------------------------------------------- + + +def test_get_field_value_top_level_attr() -> None: + ev = _evaluator() + ctx = _ctx(model_output="hello") + assert ev._get_field_value("model_output", ctx) == "hello" + + +def test_get_field_value_dotted_path_into_dict() -> None: + ev = _evaluator() + ctx = _ctx(session_state={"tool_calls": 7}) + assert ev._get_field_value("session_state.tool_calls", ctx) == 7 + + +def test_get_field_value_missing_segment_returns_none() -> None: + ev = _evaluator() + ctx = _ctx() + assert ev._get_field_value("nonexistent", ctx) is None + assert ev._get_field_value("session_state.absent", ctx) is None + + +# --------------------------------------------------------------------------- +# Existence / guardrail_fallback (special-cased before the None check) +# --------------------------------------------------------------------------- + + +def test_exists_true_when_value_present() -> None: + ev = _evaluator() + ctx = _ctx(model_output="x") + assert ev._apply_operator("exists", ev._get_field_value("model_output", ctx), None) is True + + +def test_exists_false_when_missing() -> None: + ev = _evaluator() + assert ev._apply_operator("exists", None, None) is False + + +def test_not_exists_inverse() -> None: + ev = _evaluator() + assert ev._apply_operator("not_exists", None, None) is True + assert ev._apply_operator("not_exists", "x", None) is False + + +def test_guardrail_fallback_mapped_and_disabled_fires() -> None: + ev = _evaluator() + result = ev._apply_operator( + "guardrail_fallback", + None, + {"mapped_to_uipath": True, "policy_enabled": False, "validator": "pii"}, + ) + assert result is True + + +@pytest.mark.parametrize( + "cfg", + [ + {"mapped_to_uipath": False, "policy_enabled": False}, + {"mapped_to_uipath": True, "policy_enabled": True}, + {"mapped_to_uipath": False, "policy_enabled": True}, + ], +) +def test_guardrail_fallback_silent_when_not_mapped_or_enabled(cfg: dict) -> None: + ev = _evaluator() + assert ev._apply_operator("guardrail_fallback", None, cfg) is False + + +def test_guardrail_fallback_non_dict_value_silent() -> None: + ev = _evaluator() + assert ev._apply_operator("guardrail_fallback", None, "string") is False + + +# --------------------------------------------------------------------------- +# None-field short-circuit (everything except exists / guardrail_fallback) +# --------------------------------------------------------------------------- + + +def test_other_operators_short_circuit_when_field_is_none() -> None: + ev = _evaluator() + for op in ("contains", "regex", "in_list", "gt"): + assert ev._apply_operator(op, None, "anything") is False, op + + +# --------------------------------------------------------------------------- +# Numeric operators +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "op,lhs,rhs,expected", + [ + ("gt", 5, 3, True), + ("gt", 3, 5, False), + ("gt", 3, 3, False), + ("gte", 3, 3, True), + ("gte", 2, 3, False), + ("lt", 1, 3, True), + ("lt", 3, 3, False), + ("lte", 3, 3, True), + ("lte", 4, 3, False), + ], +) +def test_numeric_operators(op: str, lhs: float, rhs: float, expected: bool) -> None: + assert _evaluator()._apply_operator(op, lhs, rhs) is expected + + +def test_numeric_operators_handle_string_coercion() -> None: + ev = _evaluator() + assert ev._apply_operator("gt", "5", "3") is True + + +def test_numeric_operators_return_false_on_uncoercible() -> None: + ev = _evaluator() + assert ev._apply_operator("gt", "not-a-number", 3) is False + assert ev._apply_operator("gt", 3, "not-a-number") is False + + +# --------------------------------------------------------------------------- +# String operators +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "op,lhs,rhs,expected", + [ + ("equals", "abc", "abc", True), + ("equals", "abc", "ABC", False), # equals is case-sensitive + ("eq", "x", "x", True), + ("not_equals", "abc", "xyz", True), + ("ne", "x", "x", False), + ("contains", "Hello World", "world", True), # case-insensitive + ("contains", "Hello", "xyz", False), + ("not_contains", "Hello", "xyz", True), + ("not_contains", "Hello", "hello", False), + ], +) +def test_string_operators(op: str, lhs: str, rhs: str, expected: bool) -> None: + assert _evaluator()._apply_operator(op, lhs, rhs) is expected + + +def test_regex_matches_pattern() -> None: + ev = _evaluator() + assert ev._apply_operator("regex", "Cost: $1,200", r"\$\d+") is True + + +def test_regex_matches_alias() -> None: + """``matches`` is documented as a synonym for ``regex``.""" + ev = _evaluator() + assert ev._apply_operator("matches", "abc-123", r"\d+") is True + + +def test_regex_invalid_pattern_returns_false() -> None: + """Malformed regex is logged and silently returns False.""" + ev = _evaluator() + assert ev._apply_operator("regex", "anything", "(unclosed") is False + + +# --------------------------------------------------------------------------- +# List operators +# --------------------------------------------------------------------------- + + +def test_in_list_membership() -> None: + ev = _evaluator() + assert ev._apply_operator("in_list", "delete_file", ["shell", "delete_file"]) is True + assert ev._apply_operator("in_list", "ls", ["shell", "delete_file"]) is False + + +def test_in_list_non_list_value_returns_false() -> None: + ev = _evaluator() + assert ev._apply_operator("in_list", "x", "not a list") is False + + +def test_not_in_list_inverse() -> None: + ev = _evaluator() + assert ev._apply_operator("not_in_list", "ls", ["shell"]) is True + assert ev._apply_operator("not_in_list", "shell", ["shell"]) is False + + +def test_not_in_list_non_list_value_returns_true() -> None: + """``not_in_list`` against a non-list value safely returns True + (nothing is in a non-list).""" + ev = _evaluator() + assert ev._apply_operator("not_in_list", "x", "not a list") is True + + +# --------------------------------------------------------------------------- +# Unknown operator +# --------------------------------------------------------------------------- + + +def test_unknown_operator_returns_false() -> None: + """Unknown operator strings log a debug message and return False.""" + ev = _evaluator() + assert ev._apply_operator("never_heard_of_this", "x", "y") is False + + +# --------------------------------------------------------------------------- +# Negate flag — flips the result +# --------------------------------------------------------------------------- + + +def test_condition_negate_flips_result() -> None: + ev = _evaluator() + ctx = _ctx(model_output="hello") + # contains "hello" → matches; negate inverts to False. + cond = Condition( + operator="contains", field="model_output", value="hello", negate=True, + ) + assert ev._evaluate_condition(cond, ctx) is False + cond2 = Condition( + operator="contains", field="model_output", value="world", negate=True, + ) + assert ev._evaluate_condition(cond2, ctx) is True + + +# --------------------------------------------------------------------------- +# Check-level logic: "all" (AND) vs "any" (OR), and empty-conditions +# --------------------------------------------------------------------------- + + +def test_empty_check_conditions_always_match() -> None: + """A check with no conditions trivially matches — surfaces rule shape bugs.""" + ev = _evaluator() + check = Check(conditions=[], logic="all") + matched, _ = ev._evaluate_check(check, _ctx()) + assert matched is True + + +def test_check_logic_all_requires_every_condition() -> None: + ev = _evaluator() + check = Check( + conditions=[ + Condition(operator="contains", field="model_output", value="a"), + Condition(operator="contains", field="model_output", value="missing"), + ], + logic="all", + ) + matched, _ = ev._evaluate_check(check, _ctx(model_output="a only")) + assert matched is False + + +def test_check_logic_any_requires_one_condition() -> None: + ev = _evaluator() + check = Check( + conditions=[ + Condition(operator="contains", field="model_output", value="present"), + Condition(operator="contains", field="model_output", value="absent"), + ], + logic="any", + ) + matched, detail = ev._evaluate_check(check, _ctx(model_output="present text")) + assert matched is True + # detail is the check's message on match; empty by default in our builder. + assert detail == "" + + +# --------------------------------------------------------------------------- +# VADER sentiment +# --------------------------------------------------------------------------- + + +def test_vader_concern_negative_text_fires() -> None: + """A clearly-negative sentence trips the default threshold of -0.3.""" + assert ( + GovernanceEvaluator._check_vader_concern( + "I absolutely hate this terrible, awful product.", {"threshold": -0.3} + ) + is True + ) + + +def test_vader_concern_positive_text_does_not_fire() -> None: + assert ( + GovernanceEvaluator._check_vader_concern( + "This is wonderful and I love it!", {"threshold": -0.3} + ) + is False + ) + + +def test_vader_concern_empty_text_silent() -> None: + assert GovernanceEvaluator._check_vader_concern("", {}) is False + assert GovernanceEvaluator._check_vader_concern(" ", {}) is False + + +def test_vader_concern_threshold_as_scalar() -> None: + """``params`` may be a bare number; the operator coerces.""" + assert ( + GovernanceEvaluator._check_vader_concern("I hate everything", -0.3) is True + ) + + +def test_vader_concern_invalid_threshold_falls_back() -> None: + """Non-numeric scalar params fall back to the documented default.""" + # "garbage" -> default -0.3 → should still classify clear negative + assert ( + GovernanceEvaluator._check_vader_concern( + "I hate this awful, terrible thing", "garbage" + ) + is True + ) + + +# --------------------------------------------------------------------------- +# Encoding integrity +# --------------------------------------------------------------------------- + + +def test_encoding_concern_clean_text_silent() -> None: + assert ( + GovernanceEvaluator._check_encoding_concern( + "Just a normal English sentence with no corruption.", {} + ) + is False + ) + + +def test_encoding_concern_empty_silent() -> None: + assert GovernanceEvaluator._check_encoding_concern("", {}) is False + + +def test_encoding_concern_replacement_chars_fire() -> None: + """U+FFFD replacement chars are a strong corruption signal.""" + text = "Hello � � world" + assert ( + GovernanceEvaluator._check_encoding_concern( + text, {"min_corruption_events": 2} + ) + is True + ) + + +def test_encoding_concern_mojibake_bigrams_fire() -> None: + """Latin-1-as-UTF-8 mojibake patterns are a known corruption shape.""" + text = "é é hello é" + assert ( + GovernanceEvaluator._check_encoding_concern( + text, {"min_corruption_events": 2} + ) + is True + ) + + +def test_encoding_concern_hex_escape_literals_fire() -> None: + """Literal ``\\xHH`` sequences mean raw bytes leaked into a string.""" + text = r"Hello \x80 \x81 \x82 world" + assert ( + GovernanceEvaluator._check_encoding_concern( + text, {"min_corruption_events": 2} + ) + is True + ) + + +# --------------------------------------------------------------------------- +# Entropy (stdlib only — deterministic) +# --------------------------------------------------------------------------- + + +def test_entropy_concern_normal_english_does_not_fire() -> None: + """English prose entropy lands ~3.5–4.5 bits/byte — inside default range.""" + text = "The quick brown fox jumps over the lazy dog." * 5 + assert ( + GovernanceEvaluator._check_entropy_concern(text, {"min": 1.5, "max": 7.5}) + is False + ) + + +def test_entropy_concern_low_entropy_fires() -> None: + """Highly repetitive text approaches 0 bits/byte.""" + text = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + assert ( + GovernanceEvaluator._check_entropy_concern(text, {"min": 1.5, "max": 7.5}) + is True + ) + + +def test_entropy_concern_high_entropy_fires() -> None: + """Random-ish bytes approach 8 bits/byte.""" + # Build text with many distinct chars to push entropy high. + text = "".join(chr(c) for c in range(32, 127)) * 5 + assert ( + GovernanceEvaluator._check_entropy_concern(text, {"min": 1.5, "max": 6.0}) + is True + ) + + +def test_entropy_concern_empty_silent() -> None: + assert GovernanceEvaluator._check_entropy_concern("", {}) is False + + +def test_entropy_concern_non_dict_params_uses_defaults() -> None: + """Non-dict params don't crash; defaults apply.""" + # Normal English prose still won't trip the default min=1.5, max=7.5 range. + text = "The quick brown fox jumps over the lazy dog." + assert ( + GovernanceEvaluator._check_entropy_concern(text, "garbage") is False + ) + + +# --------------------------------------------------------------------------- +# Incident taxonomy (regex-based, deterministic) +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "text,expected_category", + [ + ("I cannot help with that.", "safety_refusal"), + ("I'm sorry, but I cannot answer.", "safety_refusal"), + ("500 internal server error", "tool_failure"), + ("Connection refused", "tool_failure"), + ("timed out", "tool_failure"), + ("401 unauthorized", "auth_failure"), + ("authentication failed", "auth_failure"), + ("429", "quota_exceeded"), + ("rate limit exceeded", "quota_exceeded"), + ("I made that up", "hallucination"), + ("I don't actually know", "hallucination"), + ], +) +def test_incident_concern_categorical_matches(text: str, expected_category: str) -> None: + """Each category in ``_INCIDENT_PATTERNS`` has at least one matching exemplar.""" + assert expected_category in _INCIDENT_PATTERNS + assert GovernanceEvaluator._check_incident_concern(text, {}) is True + + +def test_incident_concern_unmatched_silent() -> None: + assert ( + GovernanceEvaluator._check_incident_concern( + "All systems operating normally.", {} + ) + is False + ) + + +def test_incident_concern_empty_silent() -> None: + assert GovernanceEvaluator._check_incident_concern("", {}) is False + + +def test_incident_concern_category_filter() -> None: + """Limit scanning to a subset of categories via ``categories`` param.""" + # "401 unauthorized" hits auth_failure; with only quota_exceeded enabled, + # the scanner should miss it. + assert ( + GovernanceEvaluator._check_incident_concern( + "401 unauthorized", {"categories": ["quota_exceeded"]} + ) + is False + ) + # With auth_failure enabled, it fires. + assert ( + GovernanceEvaluator._check_incident_concern( + "401 unauthorized", {"categories": ["auth_failure"]} + ) + is True + ) + + +def test_incident_concern_unknown_category_silently_dropped() -> None: + """Categories the system doesn't know about are silently ignored.""" + # Only the unknown category is requested — falls back to no categories, + # so even matching text doesn't fire. + result = GovernanceEvaluator._check_incident_concern( + "401 unauthorized", {"categories": ["unknown_cat_xyz"]} + ) + assert result is False + + +# --------------------------------------------------------------------------- +# evaluate_* dispatchers — verify they build the right CheckContext +# --------------------------------------------------------------------------- + + +def _record_context_evaluator() -> tuple[GovernanceEvaluator, dict]: + """Patch evaluate() to capture the context it receives instead of running rules.""" + captured: dict = {} + ev = _evaluator() + + def _fake_evaluate(ctx): # type: ignore[no-untyped-def] + captured["ctx"] = ctx + from datetime import datetime, timezone + + from uipath.core.governance.models import AuditRecord + + return AuditRecord( + timestamp=datetime.now(timezone.utc), + agent_name=ctx.agent_name, + runtime_id=ctx.runtime_id, + hook=ctx.hook, + evaluations=[], + final_action=Action.ALLOW, + ) + + ev.evaluate = _fake_evaluate # type: ignore[assignment] + return ev, captured + + +def test_evaluate_before_agent_builds_context() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_before_agent( + agent_input="user-text", + agent_name="a", + runtime_id="r", + model_name="gpt-5", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.BEFORE_AGENT + assert ctx.agent_input == "user-text" + assert ctx.model_name == "gpt-5" + + +def test_evaluate_after_agent_builds_context() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_after_agent( + agent_output="reply", agent_name="a", runtime_id="r", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.AFTER_AGENT + assert ctx.agent_output == "reply" + + +def test_evaluate_before_model_carries_messages() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_before_model( + model_input="prompt", + agent_name="a", + runtime_id="r", + messages=[{"role": "user", "content": "hi"}], + model_name="gpt-5", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.BEFORE_MODEL + assert ctx.model_input == "prompt" + assert ctx.messages == [{"role": "user", "content": "hi"}] + + +def test_evaluate_after_model_builds_context() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_after_model( + model_output="resp", agent_name="a", runtime_id="r", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.AFTER_MODEL + assert ctx.model_output == "resp" + + +def test_evaluate_tool_call_carries_args() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_tool_call( + tool_name="search", + tool_args={"q": "x"}, + agent_name="a", + runtime_id="r", + session_state={"tool_calls": 1}, + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.TOOL_CALL + assert ctx.tool_name == "search" + assert ctx.tool_args == {"q": "x"} + assert ctx.session_state == {"tool_calls": 1} + + +def test_evaluate_after_tool_carries_result() -> None: + ev, captured = _record_context_evaluator() + ev.evaluate_after_tool( + tool_name="search", + tool_result="some-data", + agent_name="a", + runtime_id="r", + ) + ctx = captured["ctx"] + assert ctx.hook == LifecycleHook.AFTER_TOOL + assert ctx.tool_name == "search" + assert ctx.tool_result == "some-data" + + +# --------------------------------------------------------------------------- +# DISABLED mode — evaluate() short-circuits without emitting audit +# --------------------------------------------------------------------------- + + +def test_disabled_mode_returns_empty_audit_record() -> None: + """DISABLED mode short-circuits the rule loop and audit emission.""" + rule = _rule_with_condition("contains", "model_output", "anything") + pack = PolicyPack(name="p", version="1", description="", rules=[rule]) + idx = PolicyIndex() + idx.add_pack(pack) + ev = GovernanceEvaluator( + policy_index=idx, enforcement_mode=EnforcementMode.DISABLED + ) + + audit = ev.evaluate(_ctx(model_output="contains anything")) + assert audit.final_action == Action.ALLOW + assert audit.evaluations == [] diff --git a/tests/test_governance_runtime.py b/tests/test_governance_runtime.py index 810a881..324147b 100644 --- a/tests/test_governance_runtime.py +++ b/tests/test_governance_runtime.py @@ -1,25 +1,23 @@ -"""Tests for the GovernanceRuntime wrapper and the provider loader path. +"""Tests for :class:`UiPathGovernedRuntime` — pure resolved-policy wrapper. -The runtime no longer introspects the delegate's private attributes to -discover the conversational flag — the wiring layer passes it -explicitly. The runtime also no longer reads the governance feature -flag: the wiring layer decides whether to construct -:class:`GovernanceRuntime` at all. +The runtime takes an already-resolved :class:`PolicyIndex` + +:class:`EnforcementMode` at construction (the host fetched the policy +asynchronously via the :class:`GovernancePolicyProvider` and compiled +the YAML). Tests here confirm the wrapper holds the snapshot and +passes execution straight through to the delegate. """ from __future__ import annotations from typing import Any -from uipath.core.governance import ( - EnforcementMode, - PolicyResponse, -) +from uipath.core.governance import EnforcementMode -from tests._helpers import StubPolicyProvider -from uipath.runtime.governance.native.loader import PolicyLoader +from uipath.runtime.governance.native import ( + build_policy_index_from_yaml, +) from uipath.runtime.governance.native.models import PolicyIndex -from uipath.runtime.governance.runtime import GovernanceRuntime +from uipath.runtime.governance.runtime import UiPathGovernedRuntime SIMPLE_POLICY_YAML = """ standard: provider-pack @@ -33,107 +31,28 @@ """ -# Each test constructs a fresh ``PolicyLoader`` / ``GovernanceRuntime`` -# — no module-level state to reset. - - # --------------------------------------------------------------------------- -# PolicyLoader — provider plumbing (mode application, context, errors) +# build_policy_index_from_yaml — host-side compile path # --------------------------------------------------------------------------- -def test_loader_builds_index_and_applies_mode() -> None: - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.ENFORCE, policies=SIMPLE_POLICY_YAML) - ) - - loader = PolicyLoader(provider) - index = loader.load_policy_index() - +def test_build_policy_index_from_yaml_compiles_pack() -> None: + """The host uses this to turn the provider's YAML response into the snapshot.""" + index = build_policy_index_from_yaml(SIMPLE_POLICY_YAML) assert isinstance(index, PolicyIndex) assert index.total_rules == 1 assert "provider-pack" in index.pack_names - assert loader.enforcement_mode == EnforcementMode.ENFORCE - - -def test_loader_passes_is_conversational_in_context() -> None: - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML) - ) - - PolicyLoader(provider, is_conversational=True).load_policy_index() - - assert len(provider.calls) == 1 - assert provider.calls[0].is_conversational is True - - -def test_loader_omits_is_conversational_when_unset() -> None: - """``is_conversational=None`` (the default) leaves the selector unset.""" - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML) - ) - - PolicyLoader(provider).load_policy_index() - - assert len(provider.calls) == 1 - assert provider.calls[0].is_conversational is None - - -def test_loader_returns_empty_when_provider_raises() -> None: - provider = StubPolicyProvider(raises=RuntimeError("boom")) - index = PolicyLoader(provider).load_policy_index() - assert index.total_rules == 0 - - -def test_loader_returns_empty_on_empty_policies() -> None: - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies="") - ) - index = PolicyLoader(provider).load_policy_index() - assert index.total_rules == 0 - - -def test_loader_returns_empty_on_zero_rules() -> None: - empty_pack_yaml = "standard: empty\nrules: []\n" - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=empty_pack_yaml) - ) - index = PolicyLoader(provider).load_policy_index() - assert index.total_rules == 0 -def test_loader_returns_empty_on_malformed_yaml() -> None: - provider = StubPolicyProvider( - response=PolicyResponse( - mode=EnforcementMode.AUDIT, policies="key: : invalid: : yaml" - ) - ) - index = PolicyLoader(provider).load_policy_index() +def test_build_policy_index_from_yaml_empty_yields_empty_index() -> None: + """Empty YAML compiles to an empty PolicyIndex — host can pass straight through.""" + index = build_policy_index_from_yaml("") + assert isinstance(index, PolicyIndex) assert index.total_rules == 0 -def test_loader_does_not_change_mode_when_response_mode_is_none() -> None: - """Provider returning ``mode=None`` doesn't clobber a previously-set mode.""" - p1 = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.ENFORCE, policies=SIMPLE_POLICY_YAML) - ) - loader = PolicyLoader(p1) - loader.load_policy_index() - assert loader.enforcement_mode == EnforcementMode.ENFORCE - - # Next load via a different provider that returns mode=None must not - # demote the loader's mode back to AUDIT. - loader._provider = StubPolicyProvider( - response=PolicyResponse(mode=None, policies=SIMPLE_POLICY_YAML) - ) - loader.clear_cache() - loader.load_policy_index() - - assert loader.enforcement_mode == EnforcementMode.ENFORCE - - # --------------------------------------------------------------------------- -# GovernanceRuntime — passthroughs + loader wiring +# UiPathGovernedRuntime — passthroughs # --------------------------------------------------------------------------- @@ -163,57 +82,53 @@ async def dispose(self) -> None: self.disposed = True -def test_governance_runtime_exposes_loader_bound_to_provider() -> None: - """The wrapper builds an instance-scoped PolicyLoader carrying the provider.""" - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML) +def _make_runtime( + delegate: _StubDelegate | None = None, + *, + policy_index: PolicyIndex | None = None, + enforcement_mode: EnforcementMode = EnforcementMode.AUDIT, +) -> UiPathGovernedRuntime: + """Build a runtime with sensible test defaults.""" + return UiPathGovernedRuntime( + delegate or _StubDelegate(), + policy_index if policy_index is not None else PolicyIndex(), + enforcement_mode, ) - runtime = GovernanceRuntime(_StubDelegate(), policy_provider=provider) - assert isinstance(runtime.loader, PolicyLoader) - assert runtime.loader._provider is provider - - -def test_governance_runtime_forwards_is_conversational_to_loader() -> None: - """The constructor's explicit ``is_conversational`` reaches PolicyContext.""" - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML) - ) - - runtime = GovernanceRuntime( - _StubDelegate(), policy_provider=provider, is_conversational=True - ) - # Force the prefetch to land — load synchronously so we can read calls[0]. - runtime.loader.get_policy_index() +# --------------------------------------------------------------------------- +# Snapshot stored internally — not exposed as a public property +# --------------------------------------------------------------------------- - assert provider.calls, "provider.get_policy was never invoked" - assert provider.calls[0].is_conversational is True +def test_resolved_policy_index_is_held_for_evaluator_use() -> None: + """The wrapper stores the resolved snapshot; the evaluator reads it.""" + index = build_policy_index_from_yaml(SIMPLE_POLICY_YAML) + runtime = _make_runtime(policy_index=index) + # Internal attribute — verify the wrapper kept the exact instance. + assert runtime._policy_index is index -def test_governance_runtime_loader_default_selector_is_none() -> None: - """Omitting ``is_conversational`` leaves the selector unset on PolicyContext.""" - provider = StubPolicyProvider( - response=PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML) - ) - runtime = GovernanceRuntime(_StubDelegate(), policy_provider=provider) - runtime.loader.get_policy_index() +def test_enforcement_mode_is_held_for_evaluator_use() -> None: + """The wrapper stores the mode supplied at construction.""" + runtime = _make_runtime(enforcement_mode=EnforcementMode.ENFORCE) + assert runtime._enforcement_mode is EnforcementMode.ENFORCE - assert provider.calls[0].is_conversational is None +def test_empty_policy_index_is_a_valid_construction() -> None: + """``PolicyIndex()`` with no packs is acceptable — wrapper attaches without rules.""" + runtime = _make_runtime(policy_index=PolicyIndex()) + assert runtime._policy_index.total_rules == 0 -def test_governance_runtime_with_none_provider_yields_empty_index() -> None: - """No provider → loader yields an empty PolicyIndex, no provider invocation.""" - runtime = GovernanceRuntime(_StubDelegate(), policy_provider=None) - index = runtime.loader.get_policy_index() - assert index.total_rules == 0 +# --------------------------------------------------------------------------- +# Passthrough behavior +# --------------------------------------------------------------------------- async def test_governance_runtime_execute_delegates() -> None: delegate = _StubDelegate() - runtime = GovernanceRuntime(delegate, policy_provider=None) + runtime = _make_runtime(delegate) result = await runtime.execute({"x": 1}) @@ -223,7 +138,7 @@ async def test_governance_runtime_execute_delegates() -> None: async def test_governance_runtime_stream_delegates() -> None: delegate = _StubDelegate() - runtime = GovernanceRuntime(delegate, policy_provider=None) + runtime = _make_runtime(delegate) events = [e async for e in runtime.stream({"x": 1})] @@ -233,7 +148,7 @@ async def test_governance_runtime_stream_delegates() -> None: async def test_governance_runtime_schema_and_dispose_delegate() -> None: delegate = _StubDelegate() - runtime = GovernanceRuntime(delegate, policy_provider=None) + runtime = _make_runtime(delegate) assert await runtime.get_schema() == "schema" await runtime.dispose() diff --git a/tests/test_guardrail_compensation.py b/tests/test_guardrail_compensation.py new file mode 100644 index 0000000..ef6046a --- /dev/null +++ b/tests/test_guardrail_compensation.py @@ -0,0 +1,503 @@ +"""Tests for the instance-scoped GuardrailCompensator. + +The runtime layer owns only the bounded background pool and the +contextvars propagation that keeps live OTel context visible on the +worker thread. HTTP/auth/URL/header concerns — including ``trace_id`` +resolution — live behind the +:class:`uipath.core.governance.GovernanceCompensationProvider` protocol +and are exercised in the concrete provider's own tests. + +These tests cover: + +- ``disabled_guardrails`` — distilling fired ``guardrail_fallback`` rules + into per-rule wire metadata. +- ``GuardrailCompensator.submit`` — pool routing, in-flight + backpressure, shutdown safety, wire-model assembly, and the + ``contextvars.copy_context()`` propagation that keeps the agent's + OTel span visible inside the worker callable. +- Cross-instance isolation — two compensators do not share a pool or + semaphore. +- Process-level cleanup — one ``atexit`` registration, weak refs only. +""" + +from __future__ import annotations + +import gc +import threading +from types import SimpleNamespace +from typing import Any +from unittest.mock import MagicMock, patch + +import pytest +from uipath.core.governance import ( + FiredRule, + GovernanceCompensationProvider, + GovernRequest, +) + +from uipath.runtime.governance.native import guardrail_compensation +from uipath.runtime.governance.native.guardrail_compensation import ( + GuardrailCompensator, + disabled_guardrails, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _provider() -> MagicMock: + """Mock satisfying the GovernanceCompensationProvider protocol.""" + return MagicMock(spec=GovernanceCompensationProvider) + + +def _rules( + *validators: str, + rule_id: str = "R1", + rule_name: str = "n", + pack: str = "p", +) -> list[FiredRule]: + """Build a list of FiredRule wire models — one per validator.""" + return [ + FiredRule( + rule_id=rule_id, + rule_name=rule_name, + pack_name=pack, + validator=v, + ) + for v in validators + ] + + +def _run_inline(compensator: GuardrailCompensator) -> None: + """Replace the pool's ``submit`` with synchronous execution. + + Lets tests assert provider behavior deterministically without + relying on wait()/sleep(). + """ + + def _sync_submit(fn: Any, *args: Any, **kwargs: Any) -> None: + # The compensator submits ``ctx.run, _run`` (the bound method + # of a captured context plus the callable). Mirror that here so + # the captured context still wraps the worker callable. + if args: + fn(*args, **kwargs) + else: + fn() + + compensator._pool.submit = _sync_submit # type: ignore[method-assign] + + +@pytest.fixture(autouse=True) +def _close_dangling_compensators() -> Any: + """Best-effort teardown: close any compensator weak-refs still in the set. + + Each test should call ``compensator.close()``, but a failing + assertion mid-test could leak. The sweep prevents pytest from + hanging at exit on a leftover worker pool. + """ + yield + for compensator in list(guardrail_compensation._live_compensators): + try: + compensator.close() + except Exception: # noqa: BLE001 - best-effort teardown + pass + guardrail_compensation._live_compensators.clear() + + +# --------------------------------------------------------------------------- +# disabled_guardrails +# --------------------------------------------------------------------------- + + +def test_disabled_guardrails_returns_fired_rule_for_matched_disabled_guardrail() -> None: + cond = SimpleNamespace( + operator="guardrail_fallback", + value={ + "validator": "pii_detection", + "mapped_to_uipath": True, + "policy_enabled": False, + }, + ) + rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])], pack_name="") + audit = SimpleNamespace( + evaluations=[ + SimpleNamespace(matched=True, rule_id="R1", rule_name="PII guardrail") + ] + ) + policy_index = SimpleNamespace( + get_rule=lambda rid: rule if rid == "R1" else None + ) + + out = disabled_guardrails(audit, policy_index) + + assert len(out) == 1 + fr = out[0] + assert isinstance(fr, FiredRule) + assert fr.rule_id == "R1" + assert fr.rule_name == "PII guardrail" + assert fr.pack_name == "" + assert fr.validator == "pii_detection" + + +def test_disabled_guardrails_skips_unmatched_evaluations() -> None: + audit = SimpleNamespace( + evaluations=[SimpleNamespace(matched=False, rule_id="R1", rule_name="x")] + ) + policy_index = SimpleNamespace(get_rule=lambda rid: None) + assert disabled_guardrails(audit, policy_index) == [] + + +def test_disabled_guardrails_skips_non_guardrail_conditions() -> None: + cond = SimpleNamespace(operator="regex", value="some-pattern") + rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])]) + audit = SimpleNamespace( + evaluations=[SimpleNamespace(matched=True, rule_id="R1", rule_name="x")] + ) + policy_index = SimpleNamespace(get_rule=lambda rid: rule) + assert disabled_guardrails(audit, policy_index) == [] + + +def test_disabled_guardrails_skips_enabled_guardrails() -> None: + """Mapped to UiPath AND enabled → no compensation needed.""" + cond = SimpleNamespace( + operator="guardrail_fallback", + value={ + "validator": "pii_detection", + "mapped_to_uipath": True, + "policy_enabled": True, + }, + ) + rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])], pack_name="") + audit = SimpleNamespace( + evaluations=[SimpleNamespace(matched=True, rule_id="R1", rule_name="x")] + ) + policy_index = SimpleNamespace(get_rule=lambda rid: rule) + assert disabled_guardrails(audit, policy_index) == [] + + +def test_disabled_guardrails_skips_unmapped_guardrails() -> None: + """Not mapped to UiPath → server can't fall back; skip.""" + cond = SimpleNamespace( + operator="guardrail_fallback", + value={ + "validator": "pii_detection", + "mapped_to_uipath": False, + "policy_enabled": False, + }, + ) + rule = SimpleNamespace(checks=[SimpleNamespace(conditions=[cond])], pack_name="") + audit = SimpleNamespace( + evaluations=[SimpleNamespace(matched=True, rule_id="R1", rule_name="x")] + ) + policy_index = SimpleNamespace(get_rule=lambda rid: rule) + assert disabled_guardrails(audit, policy_index) == [] + + +# --------------------------------------------------------------------------- +# GuardrailCompensator.submit — short-circuits + pool routing + backpressure +# --------------------------------------------------------------------------- + + +def test_submit_empty_rules_short_circuits() -> None: + """No rules → no pool submit, no provider call.""" + provider = _provider() + compensator = GuardrailCompensator(provider) + with patch.object(compensator, "_pool") as mock_pool: + compensator.submit([], {}, "before_model", "ts", "a", "r") + mock_pool.submit.assert_not_called() + provider.compensate.assert_not_called() + + +def test_submit_no_validators_short_circuits() -> None: + """Rules with empty validator strings → no call (nothing to dispatch).""" + provider = _provider() + compensator = GuardrailCompensator(provider) + rules = [FiredRule(rule_id="R", rule_name="n", pack_name="p", validator="")] + with patch.object(compensator, "_pool") as mock_pool: + compensator.submit(rules, {}, "before_model", "ts", "a", "r") + mock_pool.submit.assert_not_called() + provider.compensate.assert_not_called() + + +def test_submit_routes_through_pool() -> None: + """A non-empty rules list submits a single task to the pool.""" + provider = _provider() + compensator = GuardrailCompensator(provider) + with patch.object(compensator, "_pool") as mock_pool: + compensator.submit( + _rules("pii_detection"), + {"content": "x"}, + "before_model", + "ts", + "agent", + "run", + ) + mock_pool.submit.assert_called_once() + + +def test_submit_drops_when_pool_saturated() -> None: + """When the in-flight semaphore is exhausted, the call is dropped.""" + provider = _provider() + compensator = GuardrailCompensator(provider) + + # Force the semaphore into "exhausted" state. + drained = threading.BoundedSemaphore(1) + drained.acquire() # next acquire(blocking=False) returns False + compensator._inflight = drained + + with patch.object(compensator, "_pool") as mock_pool: + compensator.submit( + _rules("pii_detection"), + {}, + "before_model", + "ts", + "agent", + "run", + ) + + mock_pool.submit.assert_not_called() + provider.compensate.assert_not_called() + + +def test_submit_swallows_pool_shutdown_runtimeerror() -> None: + """If the pool was shut down, submit must not raise.""" + + class _ShutdownPool: + def submit(self, fn: Any, *args: Any, **kwargs: Any) -> None: + raise RuntimeError("cannot schedule new futures after shutdown") + + compensator = GuardrailCompensator(_provider()) + compensator._pool = _ShutdownPool() # type: ignore[assignment] + compensator._inflight = threading.BoundedSemaphore(4) + + # Must not raise. + compensator.submit(_rules("x"), {}, "before_model", "ts", "a", "r") + + +# --------------------------------------------------------------------------- +# GuardrailCompensator.submit — wire-model assembly + provider invocation +# --------------------------------------------------------------------------- + + +def test_submit_invokes_provider_with_govern_request() -> None: + """The provider receives a GovernRequest carrying every wire field. + + ``trace_id`` is left empty on the wire — the injected provider + resolves it at HTTP-call time. + """ + provider = _provider() + compensator = GuardrailCompensator(provider) + _run_inline(compensator) + rules = _rules("pii_detection", "harmful_content") + + compensator.submit( + rules, + {"content": "x"}, + "before_model", + "2026-06-06T00:00:00Z", + "langchain", + "patch-langchain", + ) + + provider.compensate.assert_called_once() + (request,) = provider.compensate.call_args.args + assert isinstance(request, GovernRequest) + # distinct validators drive the guardrail API call + assert request.validators == ["pii_detection", "harmful_content"] + assert request.rules == rules + assert request.data == {"content": "x"} + assert request.hook == "before_model" + # ``trace_id`` is intentionally empty — the provider resolves at HTTP time. + assert request.trace_id == "" + assert request.src_timestamp == "2026-06-06T00:00:00Z" + assert request.agent_name == "langchain" + assert request.runtime_id == "patch-langchain" + # Job-context fields are left for the provider to auto-fill from env. + assert request.folder_key is None + assert request.job_key is None + assert request.process_key is None + assert request.reference_id is None + assert request.agent_version is None + + +def test_submit_dedupes_validators() -> None: + """Multiple rules with the same validator collapse on the wire.""" + provider = _provider() + compensator = GuardrailCompensator(provider) + _run_inline(compensator) + rules = _rules("pii_detection") + _rules("pii_detection", rule_id="R2") + + compensator.submit(rules, {}, "before_model", "ts", "a", "r") + + (request,) = provider.compensate.call_args.args + assert request.validators == ["pii_detection"] + # Per-rule metadata is preserved (one record per rule even with shared validator). + assert len(request.rules) == 2 + + +def test_submit_swallows_provider_errors() -> None: + """A provider exception must never propagate to the caller / agent.""" + provider = _provider() + provider.compensate.side_effect = RuntimeError("network down") + compensator = GuardrailCompensator(provider) + _run_inline(compensator) + + # Must not raise. + compensator.submit(_rules("x"), {}, "before_model", "ts", "a", "r") + + provider.compensate.assert_called_once() + + +def test_submit_releases_semaphore_on_provider_error() -> None: + """Provider failure must not leak a semaphore slot.""" + provider = _provider() + provider.compensate.side_effect = RuntimeError("transient") + # 4 workers × 1 oversubscription = 4 slots total. + compensator = GuardrailCompensator(provider, inflight_oversubscription=1) + _run_inline(compensator) + + # Fire 8 — all 8 must reach the provider; the semaphore must release + # on each error so the next submit can acquire. + for _ in range(8): + compensator.submit(_rules("x"), {}, "before_model", "ts", "a", "r") + + assert provider.compensate.call_count == 8, ( + "All 8 submissions should fire — semaphore must release on error" + ) + + +# --------------------------------------------------------------------------- +# contextvars propagation — live OTel context visible inside the worker +# --------------------------------------------------------------------------- + + +def test_submit_propagates_otel_context_to_worker_thread() -> None: + """The worker callable runs inside the caller's contextvars snapshot. + + Without ``contextvars.copy_context()``, a worker thread started by + ``ThreadPoolExecutor`` would see an empty OTel context — the + the provider could only resolve env-based trace ids on the worker. + With the snapshot, the worker sees the same live span the agent + hook saw, so the provider can resolve the agent's actual trace id. + """ + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + + tracer = TracerProvider().get_tracer("test") + provider = _provider() + compensator = GuardrailCompensator(provider) + + done = threading.Event() + captured: dict[str, Any] = {} + + def _capture(request: GovernRequest) -> None: + # Runs on the worker thread but inside the captured context — + # the agent's live span should still be visible here. + ctx = trace.get_current_span().get_span_context() + captured["worker_trace_id_hex"] = ( + format(ctx.trace_id, "032x") if ctx.is_valid else "" + ) + captured["worker_thread_name"] = threading.current_thread().name + done.set() + + provider.compensate.side_effect = _capture + + with tracer.start_as_current_span("agent-run") as span: + expected = format(span.get_span_context().trace_id, "032x") + compensator.submit( + _rules("pii_detection"), + {"content": "x"}, + "before_model", + "2026-06-06T00:00:00Z", + "agent", + "rt", + ) + assert done.wait(timeout=2.0), "compensation worker never ran" + + # Worker ran on the dedicated pool thread (not the caller). + assert captured["worker_thread_name"].startswith("governance-compensation") + # And the captured contextvars context propagated the OTel span across + # the thread hop — the worker sees the same trace_id the agent saw. + assert captured["worker_trace_id_hex"] == expected + + +# --------------------------------------------------------------------------- +# Cross-instance isolation — the architectural motivation for the refactor +# --------------------------------------------------------------------------- + + +def test_two_compensators_do_not_share_pool_or_semaphore() -> None: + """Parallel runtimes cannot saturate each other's compensation pool.""" + p1 = _provider() + p2 = _provider() + c1 = GuardrailCompensator(p1) + c2 = GuardrailCompensator(p2) + + assert c1._pool is not c2._pool + assert c1._inflight is not c2._inflight + + # Drain c1's semaphore to its cap; c2 must remain unaffected. + drained = threading.BoundedSemaphore(1) + drained.acquire() + c1._inflight = drained + + _run_inline(c2) + c2.submit(_rules("pii_detection"), {}, "before_model", "ts", "a", "r") + p2.compensate.assert_called_once() + p1.compensate.assert_not_called() + + +# --------------------------------------------------------------------------- +# Lifecycle — bounded atexit + weakref tracking (mirrors AuditManager pattern) +# --------------------------------------------------------------------------- + + +def test_three_compensators_register_one_process_atexit_hook() -> None: + """N compensators → 1 atexit registration, not N. + + Regression: a per-instance ``atexit.register(self.close)`` would + grow the atexit list linearly. The fix routes everyone through one + process-level cleanup hook keyed by a WeakSet. + """ + with patch.object(guardrail_compensation.atexit, "register") as mock_register: + guardrail_compensation._atexit_registered = False + GuardrailCompensator(_provider()) + GuardrailCompensator(_provider()) + GuardrailCompensator(_provider()) + assert mock_register.call_count == 1, ( + "Each compensator must NOT register its own atexit handler" + ) + + +def test_disposed_compensator_can_be_garbage_collected() -> None: + """The WeakSet must NOT keep a disposed compensator alive.""" + import weakref + + compensator = GuardrailCompensator(_provider()) + ref = weakref.ref(compensator) + + assert compensator in guardrail_compensation._live_compensators + + compensator.close() + del compensator + gc.collect() + + assert ref() is None, ( + "GuardrailCompensator kept alive — strong reference leak in cleanup machinery" + ) + + +def test_process_cleanup_handles_already_closed_compensator() -> None: + """If a compensator was explicitly closed, the process hook is a no-op for it.""" + c = GuardrailCompensator(_provider()) + c.close() + # Must not raise. + guardrail_compensation._process_cleanup_compensators() + + +def test_close_is_idempotent() -> None: + """Calling close() twice is a logged no-op, not a crash.""" + c = GuardrailCompensator(_provider()) + c.close() + c.close() # must not raise diff --git a/tests/test_loader.py b/tests/test_loader.py deleted file mode 100644 index 87e453b..0000000 --- a/tests/test_loader.py +++ /dev/null @@ -1,307 +0,0 @@ -"""Tests for the policy loader. - -Provider-only world: each :class:`PolicyLoader` is instance-scoped and -bound to one :class:`GovernancePolicyProvider`. Tests here cover the -caching, prefetch coordination, and fallback-to-empty behavior -independent of any specific provider. End-to-end provider plumbing -(mode application, YAML parsing, runtime wrapper integration) lives in -:mod:`tests.test_governance_runtime`. - -The loader no longer reads the governance feature flag — deciding -whether governance attaches at all is the wiring layer's concern, not -the loader's. -""" - -from __future__ import annotations - -import threading -import time -from typing import Any -from unittest.mock import patch - -from uipath.core.governance import ( - EnforcementMode, - PolicyContext, - PolicyResponse, -) - -from tests._helpers import StubPolicyProvider -from uipath.runtime.governance.native import loader as loader_mod -from uipath.runtime.governance.native.loader import PolicyLoader -from uipath.runtime.governance.native.models import PolicyIndex - -SIMPLE_POLICY_YAML = """ -standard: test-pack -version: "1.0" -rules: - - id: r1 - hook: before_model - checks: - - type: regex - patterns: ["leak"] -""" - - -def _ok_response() -> PolicyResponse: - return PolicyResponse(mode=EnforcementMode.AUDIT, policies=SIMPLE_POLICY_YAML) - - -# Each test constructs a fresh ``PolicyLoader`` — no shared state to reset. - - -# --------------------------------------------------------------------------- -# _empty_index_reason — diagnostic string for the "no policies" log -# --------------------------------------------------------------------------- - - -def test_empty_index_reason_no_provider() -> None: - msg = PolicyLoader(None)._empty_index_reason() - assert "no policy provider" in msg - - -def test_empty_index_reason_with_provider() -> None: - msg = PolicyLoader(StubPolicyProvider(response=_ok_response()))._empty_index_reason() - assert "provider returned no policies" in msg - - -# --------------------------------------------------------------------------- -# load_policy_index — synchronous entry point -# --------------------------------------------------------------------------- - - -def test_load_policy_index_empty_when_no_provider() -> None: - """No provider supplied → empty PolicyIndex.""" - index = PolicyLoader(None).load_policy_index() - assert isinstance(index, PolicyIndex) - assert index.total_rules == 0 - - -def test_load_policy_index_uses_provider() -> None: - provider = StubPolicyProvider(response=_ok_response()) - - index = PolicyLoader(provider).load_policy_index() - - assert isinstance(index, PolicyIndex) - assert "test-pack" in index.pack_names - assert len(provider.calls) == 1 - - -def test_load_policy_index_returns_empty_when_provider_raises() -> None: - provider = StubPolicyProvider(raises=RuntimeError("boom")) - index = PolicyLoader(provider).load_policy_index() - assert index.total_rules == 0 - - -# --------------------------------------------------------------------------- -# get_policy_index — caching -# --------------------------------------------------------------------------- - - -def test_get_policy_index_caches_after_first_call() -> None: - """A second call returns the cached index without re-invoking the provider.""" - provider = StubPolicyProvider(response=_ok_response()) - loader = PolicyLoader(provider) - - a = loader.get_policy_index() - b = loader.get_policy_index() - - assert a is b - assert len(provider.calls) == 1 - - -def test_get_policy_index_sync_load_when_no_prefetch() -> None: - """Without a prefetch in flight, get_policy_index synchronously loads.""" - loader = PolicyLoader(StubPolicyProvider(response=_ok_response())) - index = loader.get_policy_index() - assert index.total_rules == 1 - - -def test_get_policy_index_empty_with_no_provider() -> None: - """No provider supplied → cached empty index, provider never invoked.""" - loader = PolicyLoader(None) - a = loader.get_policy_index() - b = loader.get_policy_index() - assert a is b - assert a.total_rules == 0 - - -# --------------------------------------------------------------------------- -# Prefetch — idempotency + completion + timeout -# --------------------------------------------------------------------------- - - -def test_prefetch_no_op_when_provider_is_none() -> None: - """No provider → prefetch is a no-op (no thread, no event).""" - loader = PolicyLoader(None) - loader.prefetch() - assert loader._prefetch_event is None - - -def test_prefetch_is_idempotent() -> None: - """Second call while first is in flight is a no-op (no second thread).""" - block = threading.Event() - - def _slow_get(context: PolicyContext) -> PolicyResponse: - block.wait(timeout=2.0) - return _ok_response() - - provider: Any = type("P", (), {"get_policy": staticmethod(_slow_get)})() - loader = PolicyLoader(provider) - - loader.prefetch() - first_event = loader._prefetch_event - loader.prefetch() - assert loader._prefetch_event is first_event - block.set() - if first_event is not None: - first_event.wait(timeout=2.0) - - -def test_prefetch_no_op_when_index_already_loaded() -> None: - """If the index is already cached, prefetch is a no-op.""" - provider = StubPolicyProvider(response=_ok_response()) - loader = PolicyLoader(provider) - loader.get_policy_index() # populate the cache - - loader.prefetch() - - assert len(provider.calls) == 1 - - -def test_get_policy_index_waits_for_prefetch_then_returns() -> None: - """When a prefetch is in flight, get_policy_index waits for completion.""" - started = threading.Event() - release = threading.Event() - - def _fetch(context: PolicyContext) -> PolicyResponse: - started.set() - release.wait(timeout=2.0) - return _ok_response() - - provider: Any = type("P", (), {"get_policy": staticmethod(_fetch)})() - loader = PolicyLoader(provider) - - loader.prefetch() - assert started.wait(timeout=2.0) - threading.Thread( - target=lambda: (time.sleep(0.05), release.set()), daemon=True - ).start() - index = loader.get_policy_index() - assert index.total_rules == 1 - - -def test_get_policy_index_logs_when_prefetch_completes_with_empty_index() -> None: - """The 'completed but produced no PolicyIndex' branch fires on provider failure. - - Manually wire a completed event without populating ``_policy_index`` — - simulates a prefetch worker that hit an unexpected error after the - event was claimed but before the index was set. - """ - loader = PolicyLoader(StubPolicyProvider(response=_ok_response())) - event = threading.Event() - event.set() - loader._prefetch_event = event - - with patch.object(loader_mod.logger, "warning") as mock_warning: - index = loader.get_policy_index() - - assert index.total_rules == 0 - assert any( - "completed but produced no PolicyIndex" in str(call.args[0]) - for call in mock_warning.call_args_list - ) - - -# --------------------------------------------------------------------------- -# available_packs / clear_cache -# --------------------------------------------------------------------------- - - -def test_available_packs_before_load_returns_empty() -> None: - assert PolicyLoader(None).available_packs == [] - - -def test_available_packs_after_load() -> None: - loader = PolicyLoader(StubPolicyProvider(response=_ok_response())) - loader.get_policy_index() - assert "test-pack" in loader.available_packs - - -def test_clear_cache_forces_refetch() -> None: - provider = StubPolicyProvider(response=_ok_response()) - loader = PolicyLoader(provider) - - loader.get_policy_index() - loader.clear_cache() - loader.get_policy_index() - - assert len(provider.calls) == 2 - - -def test_clear_cache_drops_in_flight_worker_result() -> None: - """A worker spawned before ``clear_cache`` must not clobber state after it. - - The race: ``prefetch()`` starts a worker, ``clear_cache()`` retires - the prefetch event, then the worker finishes and (incorrectly, - before the fix) writes its loaded index back over the cleared - cache. With the fix the worker checks ``_prefetch_event is event`` - before publishing and discards its result when orphaned. - """ - block = threading.Event() - - def _slow_get(context: PolicyContext) -> PolicyResponse: - block.wait(timeout=2.0) - return _ok_response() - - provider: Any = type("P", (), {"get_policy": staticmethod(_slow_get)})() - loader = PolicyLoader(provider) - - loader.prefetch() - captured_event = loader._prefetch_event - assert captured_event is not None # prefetch actually started - - # Retire the in-flight worker. - loader.clear_cache() - assert loader._policy_index is None - assert loader._prefetch_event is None - - # Release the worker; let it finish and try to publish. - block.set() - assert captured_event.wait(timeout=2.0) - - # The orphan worker's result must NOT land in the cache. - assert loader._policy_index is None - - -# --------------------------------------------------------------------------- -# Cross-instance isolation — the whole point of instance-scoped state -# --------------------------------------------------------------------------- - - -def test_two_loaders_do_not_share_cache() -> None: - """Concurrent loaders maintain independent caches. - - ``uipath eval`` runs multiple runtimes in parallel; each gets its - own loader and must not leak its cached PolicyIndex into the next. - """ - p1 = StubPolicyProvider(response=_ok_response()) - p2 = StubPolicyProvider(response=_ok_response()) - l1 = PolicyLoader(p1) - l2 = PolicyLoader(p2) - - l1.get_policy_index() - l2.get_policy_index() - - assert len(p1.calls) == 1 - assert len(p2.calls) == 1 - - -def test_two_loaders_carry_independent_conversational_selectors() -> None: - """Each loader threads its own selector into PolicyContext.""" - p1 = StubPolicyProvider(response=_ok_response()) - p2 = StubPolicyProvider(response=_ok_response()) - PolicyLoader(p1, is_conversational=True).load_policy_index() - PolicyLoader(p2, is_conversational=False).load_policy_index() - - assert p1.calls[0].is_conversational is True - assert p2.calls[0].is_conversational is False diff --git a/tests/test_text_extraction.py b/tests/test_text_extraction.py new file mode 100644 index 0000000..e163932 --- /dev/null +++ b/tests/test_text_extraction.py @@ -0,0 +1,307 @@ +"""Tests for ``_extract_governable_text`` content extraction. + +Replaces the old ``str(value)[:2000]`` path in ``_check_before_agent`` +and ``_check_after_agent``. Pulls clean text out of structured shapes +(dicts, list-of-blocks, pydantic models) instead of letting dict-repr +noise leak into the regex-scanned blob. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +import pytest + +# The wrapper lands in a later slice of the governance stack; skip (don't +# error at collection) when it isn't present yet. +_wrapper = pytest.importorskip( + "uipath.runtime.governance.wrapper", + reason="governance wrapper not yet present in this slice", +) +_GOVERNANCE_TEXT_CAP = _wrapper._GOVERNANCE_TEXT_CAP +_extract_governable_text = _wrapper._extract_governable_text + + +def test_plain_string_passes_through() -> None: + assert _extract_governable_text("hello world") == "hello world" + + +def test_none_returns_empty() -> None: + assert _extract_governable_text(None) == "" + + +def test_dict_with_content_key_extracts_content_first() -> None: + """The classic coded-agent output shape — content comes through clean.""" + out = _extract_governable_text( + {"content": "Estimated cost: $780", "_meta": {"id": "abc"}} + ) + assert out.startswith("Estimated cost: $780") + # No dict-syntax noise — the prior str(...) path produced ``{'content': '...'}``. + assert "{'content'" not in out + assert "'_meta'" not in out + + +def test_dict_priority_keys_lead() -> None: + """``content`` / ``text`` / etc. lead before remaining keys.""" + out = _extract_governable_text( + {"trailing_meta": "noise-meta", "content": "primary-text"} + ) + assert out.index("primary-text") < out.index("noise-meta") + + +def test_list_of_text_blocks_concatenates() -> None: + """Anthropic-style content blocks.""" + out = _extract_governable_text( + [ + {"type": "text", "text": "first part"}, + {"type": "image", "source": {"data": "..."}}, + {"type": "text", "text": "second part"}, + ] + ) + assert "first part" in out + assert "second part" in out + + +def test_openai_function_call_shape_extracts_arguments() -> None: + """``arguments`` field on OpenAI-style function-call blocks.""" + out = _extract_governable_text( + [ + { + "type": "function_call", + "name": "end_execution", + "arguments": '{"content":"Cost: $1,200"}', + "id": "fc_abc", + } + ] + ) + assert "Cost: $1,200" in out + + +def test_numeric_scalars_are_skipped() -> None: + """Numbers / booleans aren't governance text — they shouldn't pad the blob.""" + out = _extract_governable_text( + {"content": "hello", "count": 42, "ok": True, "rate": 3.14} + ) + assert out == "hello" + + +def test_pydantic_like_model_dump_is_walked() -> None: + """Anything with ``model_dump()`` is walked as its dict form.""" + + class Stub: + def model_dump(self) -> dict: + return {"content": "from pydantic"} + + assert _extract_governable_text(Stub()) == "from pydantic" + + +def test_dataclass_via_dict_method() -> None: + """Objects exposing a ``dict()`` callable also walk via that path.""" + + class Stub: + def dict(self) -> dict: + return {"content": "from dict"} + + assert _extract_governable_text(Stub()) == "from dict" + + +def test_plain_object_attribute_fallback() -> None: + """Public attributes on opaque objects feed the walker.""" + + @dataclass + class Result: + content: str + _private: str = "ignored" + + out = _extract_governable_text(Result(content="visible")) + assert "visible" in out + assert "ignored" not in out + + +def test_cycle_in_structure_does_not_recurse_forever() -> None: + a: dict = {"content": "outer"} + b: dict = {"loop": a} + a["loop"] = b + # Should return without recursing infinitely. + out = _extract_governable_text(a) + assert "outer" in out + + +def test_text_is_capped_at_budget() -> None: + """Long content is truncated so a runaway payload can't dominate scans.""" + big = "x" * (_GOVERNANCE_TEXT_CAP + 1000) + out = _extract_governable_text(big) + assert len(out) == _GOVERNANCE_TEXT_CAP + + +def test_nested_dict_content_extracted() -> None: + """LangGraph-style state with messages nested under a key.""" + out = _extract_governable_text( + { + "messages": [ + {"role": "user", "content": "hi"}, + {"role": "assistant", "content": "Cost: $50"}, + ] + } + ) + assert "Cost: $50" in out + + +def test_unknown_block_type_with_no_text_returns_empty() -> None: + """Image-only block with no text payload contributes nothing.""" + out = _extract_governable_text( + [{"type": "image", "source": {"type": "base64", "data": "..."}}] + ) + # Could be empty or contain just the base64 data — but should NOT + # contain Python dict syntax characters that the old path emitted. + assert "{'type'" not in out + + +# --------------------------------------------------------------------------- +# Budget — 64K is the current cap (raised from 8K to fit multi-turn chat). +# --------------------------------------------------------------------------- + + +def test_budget_cap_is_64k() -> None: + """Documents the cap so a future drop won't go unnoticed.""" + assert _GOVERNANCE_TEXT_CAP == 64000 + + +# --------------------------------------------------------------------------- +# Reverse list iteration — latest entry gets the budget first. +# --------------------------------------------------------------------------- + + +def test_lists_are_walked_in_reverse() -> None: + """Latest list entry leads the extracted blob. + + Critical for chat history: the new user message lives at the end of + the messages list and must be visible even when prior turns would + otherwise fill the budget first. + """ + out = _extract_governable_text( + [{"text": "earliest"}, {"text": "middle"}, {"text": "latest"}] + ) + assert out.index("latest") < out.index("middle") < out.index("earliest") + + +def test_long_chat_history_keeps_latest_user_message() -> None: + """A long history must not push the latest message out of the budget. + + Regression for the prior 8K-cap + forward-walk combination, which + silently dropped the latest user message once the conversation + grew past ~7,800 chars of prior content. + """ + bulky_prior = "x" * 2000 + messages = [{"role": "user", "content": bulky_prior}] * 40 # ~80K chars + messages.append({"role": "user", "content": "Cost: $1,200 — latest"}) + + out = _extract_governable_text({"messages": messages}) + assert "Cost: $1,200 — latest" in out + + +# --------------------------------------------------------------------------- +# latest_only — BEFORE_AGENT in a conversational agent +# --------------------------------------------------------------------------- + + +def test_latest_only_extracts_just_the_last_list_item() -> None: + """``latest_only=True`` drops every list entry but the last one.""" + out = _extract_governable_text( + { + "messages": [ + {"role": "user", "content": "old message"}, + {"role": "assistant", "content": "old response"}, + {"role": "user", "content": "Cost: $1,200"}, + ] + }, + latest_only=True, + ) + assert "Cost: $1,200" in out + assert "old message" not in out + assert "old response" not in out + + +def test_latest_only_resets_inside_chosen_item() -> None: + """Multi-block content inside the latest message is still walked fully. + + ``latest_only`` reduces the OUTER list (chat history) to its last + entry, but multi-block content (text + tool_call + thinking) + inside that latest message must still be extracted in full — + otherwise we'd lose answer text that arrives in a non-final block. + """ + out = _extract_governable_text( + { + "messages": [ + {"role": "user", "content": "old"}, + { + "role": "assistant", + "content": [ + {"type": "text", "text": "part A"}, + { + "type": "function_call", + "arguments": '{"answer":"part B"}', + }, + ], + }, + ] + }, + latest_only=True, + ) + assert "part A" in out + assert "part B" in out + assert "old" not in out + + +def test_latest_only_top_level_list() -> None: + """``latest_only`` applies when the input itself is a list.""" + out = _extract_governable_text( + [ + {"content": "history item 1"}, + {"content": "history item 2"}, + {"content": "latest input"}, + ], + latest_only=True, + ) + assert "latest input" in out + assert "history item 1" not in out + assert "history item 2" not in out + + +def test_latest_only_default_false_still_walks_all() -> None: + """Default behavior unchanged — AFTER_AGENT etc. still see everything.""" + out = _extract_governable_text( + { + "messages": [ + {"role": "user", "content": "first"}, + {"role": "user", "content": "second"}, + ] + } + ) + assert "first" in out + assert "second" in out + + +def test_latest_only_empty_list_is_empty() -> None: + """Empty history → empty extraction.""" + assert _extract_governable_text({"messages": []}, latest_only=True) == "" + + +def test_messages_is_a_priority_content_key() -> None: + """``messages`` (plural) leads ahead of non-priority keys. + + Without ``messages`` in the priority list, an input that also + carries siblings like ``thread_id`` / ``metadata`` could siphon + budget before the actual chat history is walked. + """ + out = _extract_governable_text( + { + "thread_id": "abc-xyz", + "metadata": {"foo": "bar"}, + "messages": [{"role": "user", "content": "primary content"}], + } + ) + assert "primary content" in out + assert out.index("primary content") < ( + out.find("abc-xyz") if "abc-xyz" in out else len(out) + ) diff --git a/uv.lock b/uv.lock index 4f29f88..99d4f87 100644 --- a/uv.lock +++ b/uv.lock @@ -1148,16 +1148,16 @@ wheels = [ [[package]] name = "uipath-core" -version = "0.5.22" -source = { registry = "https://pypi.org/simple" } +version = "0.5.24.dev1017616976" +source = { registry = "https://test.pypi.org/simple/" } dependencies = [ { name = "opentelemetry-instrumentation" }, { name = "opentelemetry-sdk" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e8/e0/1cdf0537ae1db831b066604e0e83132a2dd559371ac6e5d56e96b9039163/uipath_core-0.5.22.tar.gz", hash = "sha256:01ae7c3770369469acf5cef31908e8b878a5b1123f2d930f8537ea2d97d7d621", size = 136212, upload-time = "2026-06-23T16:18:43.081Z" } +sdist = { url = "https://test-files.pythonhosted.org/packages/9f/ab/a6d8edda9d02f8506698245c240c8d1b1b6c1d3398eaedfabd9756405ae3/uipath_core-0.5.24.dev1017616976.tar.gz", hash = "sha256:e0f14e00db1864d8b8ae76a9422b75293387bf7493afb91d99ca1202d9902e17", size = 130551, upload-time = "2026-06-27T10:16:48.961Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fb/97/2258d51969ec71b1056d67f39d612eac2d7c6e9458d3b3c9a0b10f42e730/uipath_core-0.5.22-py3-none-any.whl", hash = "sha256:60df655b207e02a6d3bfae8c61e1fc9bc0bf11576f7ead07b8b38f23d13fc4d6", size = 58222, upload-time = "2026-06-23T16:18:41.536Z" }, + { url = "https://test-files.pythonhosted.org/packages/4b/9e/4c0deb7d4c216be3612b18103362eeeebd2e5691e7be9ef12bce22aa4aa5/uipath_core-0.5.24.dev1017616976-py3-none-any.whl", hash = "sha256:a5755b7b6ab19298220104c9873c664ab8c9c2c2ef6afdead7c67189171899f0", size = 55002, upload-time = "2026-06-27T10:16:47.823Z" }, ] [[package]] @@ -1191,7 +1191,7 @@ dev = [ requires-dist = [ { name = "chardet", specifier = ">=5.2.0,<8.0" }, { name = "pyyaml", specifier = ">=6.0,<7.0" }, - { name = "uipath-core", specifier = ">=0.5.22,<0.6.0" }, + { name = "uipath-core", specifier = ">=0.5.24.dev1017610000,<0.5.24.dev1017620000", index = "https://test.pypi.org/simple/" }, { name = "vadersentiment", specifier = ">=3.3.2,<4.0" }, ] From d878dd0d4ce622c067ea7f90711aebfb7296898e Mon Sep 17 00:00:00 2001 From: Viswanath Lekshmanan Date: Sun, 28 Jun 2026 08:20:14 +0530 Subject: [PATCH 2/2] chore(deps): pin uipath-core + uipath-platform to PR #1761 testpypi dev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Local-only pin (uv-specific tables; not in [project.dependencies], so the published wheel's Requires-Dist is unaffected). Aligns the local resolver with the dev builds from uipath-python PR #1761 (``refactor(core): drop AdapterRegistry + BaseAdapter; keep EvaluatorProtocol``) — those carry the AuditRecord.trace_id field drop the runtime now relies on. - ``override-dependencies`` pins ``uipath-core==0.5.24.dev1017616976`` and ``uipath-platform==0.1.79.dev1017616976`` (the ``uipath`` sub- package block in PR #1761). - ``[tool.uv.sources]`` adds ``uipath-platform = { index = "testpypi" }`` so the platform pin is resolvable; the existing entry for ``uipath-core`` stays put. - ``[tool.uv.exclude-newer-package]`` adds ``uipath-platform = false`` so the 2-day age guard doesn't filter out the dev build (mirrors the existing toggle for ``uipath-core``). The pin will be reverted once PR #1761 lands and a stable ``uipath-core`` is published. Verified: ``uv pip show uipath-core`` → ``0.5.24.dev1017616976``; 346 tests + 1 skipped pass, ruff + mypy clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- pyproject.toml | 6 ++++++ uv.lock | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8d8792f..35493d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -122,6 +122,12 @@ exclude_lines = [ [tool.uv] exclude-newer = "2 days" +# Pin to the testpypi dev builds from uipath-python PR #1761 +# (refactor: drop AdapterRegistry + BaseAdapter; keep EvaluatorProtocol). +# Local-only — does not affect the published wheel's Requires-Dist. +override-dependencies = [ + "uipath-core==0.5.24.dev1017616976" +] [tool.uv.exclude-newer-package] uipath-core = false diff --git a/uv.lock b/uv.lock index 99d4f87..3938598 100644 --- a/uv.lock +++ b/uv.lock @@ -9,6 +9,9 @@ exclude-newer-span = "P2D" [options.exclude-newer-package] uipath-core = false +[manifest] +overrides = [{ name = "uipath-core", specifier = "==0.5.24.dev1017616976", index = "https://test.pypi.org/simple/" }] + [[package]] name = "annotated-types" version = "0.7.0" @@ -1191,7 +1194,7 @@ dev = [ requires-dist = [ { name = "chardet", specifier = ">=5.2.0,<8.0" }, { name = "pyyaml", specifier = ">=6.0,<7.0" }, - { name = "uipath-core", specifier = ">=0.5.24.dev1017610000,<0.5.24.dev1017620000", index = "https://test.pypi.org/simple/" }, + { name = "uipath-core", specifier = ">=0.5.22,<0.6.0", index = "https://test.pypi.org/simple/" }, { name = "vadersentiment", specifier = ">=3.3.2,<4.0" }, ]