diff --git a/Ix/Cli/CompileCmd.lean b/Ix/Cli/CompileCmd.lean index a00f95e5..fbed8bfa 100644 --- a/Ix/Cli/CompileCmd.lean +++ b/Ix/Cli/CompileCmd.lean @@ -109,8 +109,8 @@ def runCompileCmd (p : Cli.Parsed) : IO UInt32 := do -- Persist the serialized IxonEnv (`Env::put` bytes) to disk so subsequent -- runs (e.g. `ix check-ixon`) can skip the Lean → IxOn compile step. The -- resulting file is the canonical streaming format produced by - -- `Ixon.Env::put` (see `src/ix/ixon/serialize.rs:1093-1297`); it round-trips - -- through `Ixon.Env::get`. + -- `Ixon.Env::put` (see `crates/ixon/src/serialize.rs` and the `.ixe` + -- layout spec in `docs/Ixon.md`); it round-trips through `Ixon.Env::get`. let writeStart ← IO.monoMsNow IO.FS.writeBinFile outPath bytes let writeMs := (← IO.monoMsNow) - writeStart diff --git a/Ix/Ixon.lean b/Ix/Ixon.lean index 0bf494ac..610c3ca5 100644 --- a/Ix/Ixon.lean +++ b/Ix/Ixon.lean @@ -1520,6 +1520,20 @@ structure Env where comms : Std.HashMap Address Comm := {} /-- Reverse index: constant Address → Ix.Name -/ addrToName : Std.HashMap Address Ix.Name := {} + /-- Distinguished root constant for bundle envs; `none` for whole + environments. A pointer, not a proof: readers check + `main ∈ consts`, and consumers holding an externally-expected + address must compare against it. -/ + main : Option Address := none + /-- Explicit trust boundary for thin bundles: addresses (constants + or blobs) the receiver is expected to already have. Serialized + as a strictly ascending leaf list; `Ix.Merkle.merkleRootCanonical` + over it reproduces the root a `Claim.assumptions` commits to. -/ + assumptions : Std.HashSet Address := {} + /-- Reducibility hints (§3, the canonical hint channel for anon/lazy + readers). When empty, `putEnv` derives the section from Named + `.defn` metadata at write time. Mirrors Rust `Env.anon_hints`. -/ + anonHints : Std.HashMap Address Lean.ReducibilityHints := {} deriving Inhabited namespace Env @@ -1627,13 +1641,21 @@ structure RawNameEntry where deriving Repr, Inhabited, BEq /-- Raw FFI environment structure using arrays instead of HashMaps. - This is the array-based equivalent of `Env` for FFI compatibility. -/ + This is the array-based equivalent of `Env` for FFI compatibility. + Field order matters: the Rust mirror (`LeanIxonRawEnv`) addresses + constructor slots positionally (0-7). -/ structure RawEnv where consts : Array RawConst named : Array RawNamed blobs : Array RawBlob comms : Array RawComm names : Array RawNameEntry := #[] + /-- Bundle root (`Env.main`). -/ + main : Option Address := none + /-- Bundle trust boundary (`Env.assumptions`), sorted ascending. -/ + assumptions : Array Address := #[] + /-- Explicit reducibility hints (`Env.anonHints`), sorted by address. -/ + anonHints : Array (Address × Lean.ReducibilityHints) := #[] deriving Repr, Inhabited, BEq namespace RawEnv @@ -1691,7 +1713,10 @@ def toEnv (raw : RawEnv) : Env := Id.run do env := { env with blobs := env.blobs.insert addr bytes } for ⟨addr, comm⟩ in raw.comms do env := env.storeComm addr comm - return env + return { env with + main := raw.main + assumptions := raw.assumptions.foldl (·.insert ·) {} + anonHints := raw.anonHints.foldl (fun m (a, h) => m.insert a h) {} } end RawEnv @@ -1700,7 +1725,9 @@ end RawEnv namespace Env /-- Convert Env with HashMaps to RawEnv with Arrays for FFI. - Includes the full names table for round-trip fidelity. -/ + Includes the full names table for round-trip fidelity. The + set-shaped bundle fields are sorted so the transfer is + deterministic (matches Rust `ixon_env_to_decoded`). -/ def toRawEnv (env : Env) : RawEnv := { consts := env.consts.toArray.map fun (addr, lc) => { addr, const := lc.get?.getD default } @@ -1708,6 +1735,11 @@ def toRawEnv (env : Env) : RawEnv := { blobs := env.blobs.toArray.map fun (addr, bytes) => { addr, bytes } comms := env.comms.toArray.map fun (addr, comm) => { addr, comm } names := env.names.toArray.map fun (addr, name) => { addr, name } + main := env.main + assumptions := env.assumptions.toList.toArray.qsort + fun a b => (compare a b).isLT + anonHints := env.anonHints.toList.toArray.qsort + fun a b => (compare a.1 b.1).isLT } /-- Tag4 flag for Env (0xE), variant 0. -/ @@ -1773,11 +1805,15 @@ partial def topologicalSortNames (names : Std.HashMap Address Ix.Name) : Array ( let visited := visited.insert addr let result := result.push (addr, name) (visited, result) - -- Start with anonymous already visited (it's implicit) + -- Include the anonymous name first so it gets index 0 in the name + -- index (arena nodes frequently reference it as a binder name). + -- Matches Rust `topological_sort_names`, which emits it explicitly — + -- required for byte-identical writer output across the mirrors. let initVisited : Std.HashSet Address := ({} : Std.HashSet Address).insert anonAddr + let initResult : Array (Address × Ix.Name) := #[(anonAddr, Ix.Name.mkAnon)] -- Sort names by address before iterating to ensure deterministic DFS order let sortedEntries := names.toList.toArray.qsort fun a b => (compare a.1 b.1).isLT - let (_, result) := sortedEntries.foldl (init := (initVisited, #[])) fun (visited, result) (_, name) => + let (_, result) := sortedEntries.foldl (init := (initVisited, initResult)) fun (visited, result) (_, name) => visit name visited result result @@ -1795,6 +1831,19 @@ def putEnv (env : Env) : PutM Unit := do let root := (Ix.Merkle.merkleRootCanonical constAddrs).getD Ix.Merkle.zeroAddress Serialize.put root + -- Bundle header fields: main (Option, 0/1-tagged) + assumptions + -- (strictly ascending address list). Matches Rust `Env::put`. + match env.main with + | none => putU8 0 + | some addr => do + putU8 1 + Serialize.put addr + let assumptions := env.assumptions.toList.toArray.qsort + fun a b => (compare a b).isLT + putTag0 ⟨assumptions.size.toUInt64⟩ + for addr in assumptions do + Serialize.put addr + -- Section 1: Blobs (Address -> bytes) let blobs := env.blobs.toList.toArray.qsort fun a b => (compare a.1 b.1).isLT putTag0 ⟨blobs.size.toUInt64⟩ @@ -1820,7 +1869,36 @@ def putEnv (env : Env) : PutM Unit := do putTag0 ⟨bytes.size.toUInt64⟩ putBytes bytes - -- Section 3: Names (Address -> Name component) + -- Section 3: anon_hints — the canonical hint channel for the + -- anon/lazy readers, placed before the metadata sections so they can + -- stop right after it. When the in-memory map is empty (the compile + -- path: hints live in Named metadata), the section is derived from + -- `named`. `named` is sorted here (the §5 canonical order, by name + -- hash) and reused for §5 below; the sorted iteration makes the + -- first-wins dedup by constant address deterministic. Matches Rust + -- `Env::put`. + let named := env.named.toList.toArray.qsort fun a b => (compare a.1 b.1).isLT + let hintPairs : Array (Address × Lean.ReducibilityHints) := + if env.anonHints.isEmpty then Id.run do + let mut seen : Std.HashSet Address := {} + let mut pairs : Array (Address × Lean.ReducibilityHints) := #[] + for (_, namedEntry) in named do + match namedEntry.constMeta with + | .defn _ _ hints _ _ _ _ _ => + if !seen.contains namedEntry.addr then + seen := seen.insert namedEntry.addr + pairs := pairs.push (namedEntry.addr, hints) + | _ => pure () + return pairs + else + env.anonHints.toList.toArray + let hintPairs := hintPairs.qsort fun a b => (compare a.1 b.1).isLT + putTag0 ⟨hintPairs.size.toUInt64⟩ + for (addr, hints) in hintPairs do + Serialize.put addr + putReducibilityHints hints + + -- Section 4: Names (Address -> Name component) -- Topologically sorted so parents come before children, with ties broken by address let sortedNames := topologicalSortNames env.names -- Build name index from sorted positions (matching Rust) @@ -1831,8 +1909,7 @@ def putEnv (env : Env) : PutM Unit := do Serialize.put addr putNameComponent name - -- Section 4: Named (name Address -> Named with metadata) - let named := env.named.toList.toArray.qsort fun a b => (compare a.1 b.1).isLT + -- Section 5: Named (name Address -> Named with metadata) putTag0 ⟨named.size.toUInt64⟩ for (name, namedEntry) in named do -- Use the name's stored hash, which matches how it was stored in env.names @@ -1847,7 +1924,7 @@ def putEnv (env : Env) : PutM Unit := do Serialize.put origAddr putConstantMetaIndexed origMeta nameIdx - -- Section 5: Comms (Address -> Comm) + -- Section 6: Comms (Address -> Comm) let comms := env.comms.toList.toArray.qsort fun a b => (compare a.1 b.1).isLT putTag0 ⟨comms.size.toUInt64⟩ for (addr, comm) in comms do @@ -1872,28 +1949,68 @@ def getEnv : GetM Env := do -- the recomputed root. let storedRoot : Address ← Serialize.get - let mut env : Env := {} - - -- Section 1: Blobs + -- Bundle header fields: main (Option) + strictly ascending + -- assumptions list. A pre-bundle-format `.ixe` has the §1 blob + -- count here, so a bad tag most likely means a stale file. + let mainTag ← getU8 + let main : Option Address ← match mainTag with + | 0 => pure none + | 1 => some <$> Serialize.get + | x => throw s!"Env.get: invalid main tag {x} in bundle header — \ + possibly a pre-bundle-format .ixe; recompile it" + let numAssumptions := (← getTag0).size + let mut assumptionArr : Array Address := #[] + for _ in [:numAssumptions.toNat] do + let addr : Address ← Serialize.get + if let some prev := assumptionArr.back? then + if !(compare prev addr).isLT then + throw "Env.get: assumptions not strictly ascending" + assumptionArr := assumptionArr.push addr + + let mut env : Env := { + main + assumptions := assumptionArr.foldl (·.insert ·) {} + } + + -- Section 1: Blobs (hash-verified per entry: a swapped blob would + -- otherwise silently change a Nat/String literal's value — the + -- consts merkle root covers only constant addresses) let numBlobs := (← getTag0).size for _ in [:numBlobs.toNat] do let addr ← Serialize.get let len := (← getTag0).size let bytes ← getBytes len.toNat + if Address.blake3 bytes != addr then + throw s!"Env.get: blob bytes hash mismatch for {reprStr (toString addr)}" env := { env with blobs := env.blobs.insert addr bytes } - -- Section 2: Consts (length-prefixed; see putEnv for rationale) + -- Section 2: Consts (length-prefixed; see putEnv for rationale). + -- Per-entry integrity: bytes must hash to the stored address. let numConsts := (← getTag0).size for _ in [:numConsts.toNat] do let addr ← Serialize.get let len := (← getTag0).size let bytes ← getBytes len.toNat + if Address.blake3 bytes != addr then + throw s!"Env.get: const bytes hash mismatch for {reprStr (toString addr)}" match deConstant bytes with | .ok constant => env := env.storeConst addr constant | .error e => throw s!"Env.get: bad constant bytes for addr {reprStr (toString addr)}: {e}" - -- Section 3: Names (build lookup table AND reverse index) + -- `main` must reference a constant actually present in the file. + if let some m := main then + if !env.consts.contains m then + throw s!"Env.get: main {reprStr (toString m)} not present in consts" + + -- Section 3: anon_hints + let numHints := (← getTag0).size + for _ in [:numHints.toNat] do + let addr : Address ← Serialize.get + let hints ← getReducibilityHints + env := { env with anonHints := env.anonHints.insert addr hints } + + -- Section 4: Names (build lookup table AND reverse index) let numNames := (← getTag0).size let mut namesLookup : Std.HashMap Address Ix.Name := {} let mut nameRev : NameReverseIndex := #[] @@ -1906,7 +2023,7 @@ def getEnv : GetM Env := do namesLookup := namesLookup.insert addr name env := { env with names := env.names.insert addr name } - -- Section 4: Named (name Address -> Named with metadata) + -- Section 5: Named (name Address -> Named with metadata) let numNamed := (← getTag0).size for _ in [:numNamed.toNat] do let nameAddr ← Serialize.get @@ -1930,7 +2047,7 @@ def getEnv : GetM Env := do | none => throw s!"getEnv: named entry references unknown name address {reprStr (toString nameAddr)}" - -- Section 5: Comms + -- Section 6: Comms let numComms := (← getTag0).size for _ in [:numComms.toNat] do let addr ← Serialize.get (α := Address) @@ -1946,6 +2063,13 @@ def getEnv : GetM Env := do if computedRoot != storedRoot then throw "Env.get: merkle root mismatch" + -- Comms is the final section; trailing bytes are truncation damage + -- or concatenated garbage. (Mirrors Rust `Env::get`; the early-stop + -- readers cannot make this check by design.) + let st ← get + if st.idx != st.bytes.size then + throw s!"Env.get: {st.bytes.size - st.idx} trailing bytes after final section" + pure env end Env @@ -1956,8 +2080,9 @@ def serEnv (env : Env) : ByteArray := runPut (Env.putEnv env) /-- Deserialize an Env from bytes (full metadata, pure Lean). -/ def deEnv (bytes : ByteArray) : Except String Env := runGet Env.getEnv bytes -/-- Compute section sizes for debugging. Returns (blobs, consts, names, named, comms). -/ -def envSectionSizes (env : Env) : Nat × Nat × Nat × Nat × Nat := Id.run do +/-- Compute section sizes for debugging. + Returns (blobs, consts, anonHints, names, named, comms). -/ +def envSectionSizes (env : Env) : Nat × Nat × Nat × Nat × Nat × Nat := Id.run do -- Blobs section let blobsBytes := runPut do let blobs := env.blobs.toList.toArray.qsort fun a b => (compare a.1 b.1).isLT @@ -1975,6 +2100,29 @@ def envSectionSizes (env : Env) : Nat × Nat × Nat × Nat × Nat := Id.run do Serialize.put addr putBytes lc.rawBytes + -- anon_hints section (mirrors putEnv's derive-from-named rule) + let hintsBytes := runPut do + let named := env.named.toList.toArray.qsort fun a b => (compare a.1 b.1).isLT + let hintPairs : Array (Address × Lean.ReducibilityHints) := + if env.anonHints.isEmpty then Id.run do + let mut seen : Std.HashSet Address := {} + let mut pairs : Array (Address × Lean.ReducibilityHints) := #[] + for (_, namedEntry) in named do + match namedEntry.constMeta with + | .defn _ _ hints _ _ _ _ _ => + if !seen.contains namedEntry.addr then + seen := seen.insert namedEntry.addr + pairs := pairs.push (namedEntry.addr, hints) + | _ => pure () + return pairs + else + env.anonHints.toList.toArray + let hintPairs := hintPairs.qsort fun a b => (compare a.1 b.1).isLT + putTag0 ⟨hintPairs.size.toUInt64⟩ + for (addr, hints) in hintPairs do + Serialize.put addr + putReducibilityHints hints + -- Names section let namesBytes := runPut do let sortedNames := Env.topologicalSortNames env.names @@ -2009,7 +2157,8 @@ def envSectionSizes (env : Env) : Nat × Nat × Nat × Nat × Nat := Id.run do Serialize.put addr putComm comm - (blobsBytes.size, constsBytes.size, namesBytes.size, namedBytes.size, commsBytes.size) + ( blobsBytes.size, constsBytes.size, hintsBytes.size, namesBytes.size, + namedBytes.size, commsBytes.size ) /-! ## Rust FFI Serialization -/ @@ -2056,11 +2205,16 @@ structure RawNamedLite where hintVal : UInt64 deriving Inhabited -/-- Metadata-light env returned by `rs_de_env_lazy`. -/ +/-- Metadata-light env returned by `rs_de_env_lazy`. Field order + matters: the Rust builder addresses constructor slots 0-4. -/ structure RawEnvLazy where consts : Array RawConstSlice named : Array RawNamedLite blobs : Array RawBlob + /-- Bundle root (`Env.main`) from the header. -/ + main : Option Address := none + /-- Bundle trust boundary (`Env.assumptions`), in header order. -/ + assumptions : Array Address := #[] deriving Inhabited /-- Decode an encoded reducibility hint into the `ConstantMeta` the check path @@ -2080,7 +2234,8 @@ def RawNamedLite.toConstMeta (n : RawNamedLite) : ConstantMeta := that share `buf`. `buf` MUST be the exact buffer passed to `rs_de_env_lazy` (offsets are relative to it). -/ def RawEnvLazy.toEnv (raw : RawEnvLazy) (buf : ByteArray) : Env := Id.run do - let mut env : Env := {} + let mut env : Env := { main := raw.main + assumptions := raw.assumptions.foldl (·.insert ·) {} } for ⟨addr, offset, len⟩ in raw.consts do env := { env with consts := env.consts.insert addr @@ -2104,9 +2259,10 @@ opaque rsDeEnvLazyFFI : @& ByteArray → Except String RawEnvLazy def deEnvAnon (bytes : ByteArray) : Except String Env := return (← rsDeEnvLazyFFI bytes).toEnv bytes -/-- Anonymous-only deserialization: keep blobs + consts, parse-and-drop - names/named/comms. Returns a `RawEnv` whose `named`/`names`/`comms` - arrays are empty. -/ +/-- Anonymous-only deserialization: keep blobs + consts + anon_hints + and stop — the metadata sections (names/named/comms) are laid out + after the hints and never touched. Returns a `RawEnv` whose + `named`/`names`/`comms` arrays are empty. -/ @[extern "rs_de_env_anon"] opaque rsDeEnvAnonFFI : @& ByteArray → Except String RawEnv diff --git a/Tests/FFI/Lifecycle.lean b/Tests/FFI/Lifecycle.lean index 79099038..d30554b7 100644 --- a/Tests/FFI/Lifecycle.lean +++ b/Tests/FFI/Lifecycle.lean @@ -137,10 +137,11 @@ def serdeTests : TestSeq := -- Empty RawEnv. Only data construction happens eagerly; FFI is deferred -- inside `mkSerdeRoundtripTest`. let empty : RawEnv := { consts := #[], named := #[], blobs := #[], comms := #[] } - -- RawEnv with data. The const's `addr` must be the canonical content - -- hash (`Address.blake3 (serConstant c)`) — the Rust loader verifies - -- `Address::hash(bytes) == addr` on load. Blobs and comms don't carry - -- a content-hash invariant, so `testAddr` is fine there. + -- RawEnv with data. Consts AND blobs must be content-addressed — the + -- Rust loader verifies `Address::hash(bytes) == addr` per entry for + -- both. `testAddr` is blake3 of `[1,2,3]`, which is exactly the blob's + -- bytes below, so the blob invariant holds; comms carry no + -- content-hash invariant, so `testAddr` is fine there too. let testAddr := Address.blake3 (ByteArray.mk #[1, 2, 3]) let testExpr : Expr := .sort 0 let testDef : Definition := { @@ -220,13 +221,12 @@ private def genSerdeRawEnv : Gen RawEnv := do let idx ← Gen.choose Nat 0 (pool.size - 1) let (addr, name) := pool[idx]! named := named.push { name, addr, constMeta := .empty } - -- Blobs: pool addresses + -- Blobs: content-addressed (the loaders verify blake3(bytes) == addr) let numBlobs ← Gen.choose Nat 0 3 let mut blobs : Array RawBlob := #[] for _ in [:numBlobs] do - let addr ← pickAddr let bytes ← Tests.Gen.Ixon.genByteArray - blobs := blobs.push { addr, bytes } + blobs := blobs.push { addr := Address.blake3 bytes, bytes } -- Comms: pool addresses let numComms ← Gen.choose Nat 0 2 let mut comms : Array RawComm := #[] diff --git a/Tests/Gen/Ixon.lean b/Tests/Gen/Ixon.lean index 4f767844..b001c037 100644 --- a/Tests/Gen/Ixon.lean +++ b/Tests/Gen/Ixon.lean @@ -457,9 +457,12 @@ def genRawConst : Gen RawConst := do def genRawNamed : Gen RawNamed := RawNamed.mk <$> genIxName 3 <*> genAddress <*> pure .empty -/-- Generate a RawBlob -/ -def genRawBlob : Gen RawBlob := - RawBlob.mk <$> genAddress <*> genByteArray +/-- Generate a RawBlob whose `addr` is the canonical content hash of + `bytes`. Readers verify `Address.blake3 bytes == addr` per entry + (blob-tampering defense), so arbitrary addresses are rejected. -/ +def genRawBlob : Gen RawBlob := do + let bytes ← genByteArray + pure { addr := Address.blake3 bytes, bytes } /-- Generate a RawComm -/ def genRawComm : Gen RawComm := @@ -469,13 +472,29 @@ def genRawComm : Gen RawComm := def genRawNameEntry : Gen RawNameEntry := RawNameEntry.mk <$> genAddress <*> genIxName 3 -/-- Generate a RawEnv with small arrays to avoid memory issues -/ -def genRawEnv : Gen RawEnv := - RawEnv.mk <$> genSmallArray genRawConst - <*> genSmallArray genRawNamed - <*> genSmallArray genRawBlob - <*> genSmallArray genRawComm - <*> genSmallArray genRawNameEntry +/-- Generate a RawEnv with small arrays to avoid memory issues. `main` + (when present) references a generated const — writers/readers check + `main ∈ consts`. Assumptions and hint keys are opaque addresses. -/ +def genRawEnv : Gen RawEnv := do + let consts ← genSmallArray genRawConst + let named ← genSmallArray genRawNamed + let blobs ← genSmallArray genRawBlob + let comms ← genSmallArray genRawComm + let names ← genSmallArray genRawNameEntry + let main ← + if consts.size > 0 then + frequency [ + (1, pure none), + (1, do + let i ← Gen.choose Nat 0 (consts.size - 1) + pure (consts[i % consts.size]?.map (·.addr))), + ] + else pure none + let assumptionList ← genList genAddress + let assumptions := assumptionList.toArray.qsort fun a b => (compare a b).isLT + let hintList ← genList (Prod.mk <$> genAddress <*> genReducibilityHints) + let anonHints := hintList.toArray.qsort fun a b => (compare a.1 b.1).isLT + pure { consts, named, blobs, comms, names, main, assumptions, anonHints } instance : Shrinkable RawConst where shrink rc := (fun c => { rc with const := c }) <$> Shrinkable.shrink rc.const @@ -486,18 +505,36 @@ instance : Shrinkable RawNamed where | _ => [{ rn with constMeta := .empty }] instance : Shrinkable RawBlob where - shrink rb := if rb.bytes.size > 0 then [{ rb with bytes := ByteArray.empty }] else [] + shrink rb := + -- Keep the content-address invariant: shrunk bytes need the + -- matching hash or the loaders reject the shrunk case. + if rb.bytes.size > 0 then + [{ addr := Address.blake3 ByteArray.empty, bytes := ByteArray.empty }] + else [] instance : Shrinkable RawComm where shrink _ := [] instance : Shrinkable RawEnv where shrink env := - (if env.consts.size > 0 then [{ env with consts := env.consts.pop }] else []) ++ + -- Popping a const may orphan `main` (writers reject main ∉ consts), + -- so clear it when its target is dropped. + (if env.consts.size > 0 then + let popped := env.consts.pop + let main := match env.main with + | some m => if popped.any (·.addr == m) then env.main else none + | none => none + [{ env with consts := popped, main }] + else []) ++ (if env.named.size > 0 then [{ env with named := env.named.pop }] else []) ++ (if env.blobs.size > 0 then [{ env with blobs := env.blobs.pop }] else []) ++ (if env.comms.size > 0 then [{ env with comms := env.comms.pop }] else []) ++ - (if env.names.size > 0 then [{ env with names := env.names.pop }] else []) + (if env.names.size > 0 then [{ env with names := env.names.pop }] else []) ++ + (if env.assumptions.size > 0 then [{ env with assumptions := env.assumptions.pop }] else []) ++ + (if env.anonHints.size > 0 then [{ env with anonHints := env.anonHints.pop }] else []) ++ + (match env.main with + | some _ => [{ env with main := none }] + | none => []) instance : SampleableExt RawConst := SampleableExt.mkSelfContained genRawConst instance : SampleableExt RawNamed := SampleableExt.mkSelfContained genRawNamed diff --git a/Tests/Ix/Compile.lean b/Tests/Ix/Compile.lean index f3d34e9c..7efdf000 100644 --- a/Tests/Ix/Compile.lean +++ b/Tests/Ix/Compile.lean @@ -337,8 +337,8 @@ def testCrossImpl : TestSeq := IO.println s!"[Step 4] Lean blob stats: total={fmtBytes leanTotalBlobData}, max={fmtBytes leanMaxBlob}, avg={fmtBytes leanAvgBlob}, big(>1kB)={leanBig}, huge(>100kB)={leanHuge}" IO.println s!"[Step 4] Lean top 10 blob sizes: {leanTopSizes.map fmtBytes}" - let (leanBlobs, leanConsts, leanNames, leanNamed, leanComms) := Ixon.envSectionSizes leanIxonEnv - IO.println s!"[Step 4] Lean sections: blobs={fmtBytes leanBlobs}, consts={fmtBytes leanConsts}, names={fmtBytes leanNames}, named={fmtBytes leanNamed}, comms={fmtBytes leanComms}" + let (leanBlobs, leanConsts, leanHints, leanNames, leanNamed, leanComms) := Ixon.envSectionSizes leanIxonEnv + IO.println s!"[Step 4] Lean sections: blobs={fmtBytes leanBlobs}, consts={fmtBytes leanConsts}, hints={fmtBytes leanHints}, names={fmtBytes leanNames}, named={fmtBytes leanNamed}, comms={fmtBytes leanComms}" let leanEnvBytes := serializeEnv leanIxonEnv IO.println s!"[Step 4] Lean env done: {fmtBytes leanEnvBytes.size}" @@ -358,8 +358,8 @@ def testCrossImpl : TestSeq := IO.println s!"[Step 4] Rust blob stats: total={fmtBytes rustTotalBlobData}, max={fmtBytes rustMaxBlob}, avg={fmtBytes rustAvgBlob}, big(>1kB)={rustBig}, huge(>100kB)={rustHuge}" IO.println s!"[Step 4] Rust top 10 blob sizes: {rustTopSizes.map fmtBytes}" - let (rustBlobs, rustConsts, rustNames, rustNamed, rustComms) := Ixon.envSectionSizes phases.compileEnv - IO.println s!"[Step 4] Rust sections: blobs={fmtBytes rustBlobs}, consts={fmtBytes rustConsts}, names={fmtBytes rustNames}, named={fmtBytes rustNamed}, comms={fmtBytes rustComms}" + let (rustBlobs, rustConsts, rustHints, rustNames, rustNamed, rustComms) := Ixon.envSectionSizes phases.compileEnv + IO.println s!"[Step 4] Rust sections: blobs={fmtBytes rustBlobs}, consts={fmtBytes rustConsts}, hints={fmtBytes rustHints}, names={fmtBytes rustNames}, named={fmtBytes rustNamed}, comms={fmtBytes rustComms}" let rustEnvBytes := serializeEnv phases.compileEnv IO.println s!"[Step 4] Rust env done: {fmtBytes rustEnvBytes.size}" let serTime := (← IO.monoMsNow) - serStart diff --git a/Tests/Ix/Ixon.lean b/Tests/Ix/Ixon.lean index 72d22ec4..db7394bd 100644 --- a/Tests/Ix/Ixon.lean +++ b/Tests/Ix/Ixon.lean @@ -153,9 +153,11 @@ def envSerdeUnit (env : Env) : Bool := def envUnitTests : TestSeq := -- Test 1: Empty env let emptyEnv : Env := {} - -- Test 2: Env with only a blob - let blobAddr := Address.blake3 (ByteArray.mk #[1, 2, 3]) - let envWithBlob : Env := { blobs := ({} : Std.HashMap _ _).insert blobAddr (ByteArray.mk #[4, 5, 6]) } + -- Test 2: Env with only a blob. Blob addresses must be the content + -- hash of the bytes — readers verify per entry. + let blobBytes := ByteArray.mk #[4, 5, 6] + let blobAddr := Address.blake3 blobBytes + let envWithBlob : Env := { blobs := ({} : Std.HashMap _ _).insert blobAddr blobBytes } -- Test 3: Env with a simple name (no named entry) let testName := Ix.Name.mkStr Ix.Name.mkAnon "test" let testNameAddr := testName.getHash @@ -179,15 +181,34 @@ def envUnitTests : TestSeq := let payloadAddr := Address.blake3 (ByteArray.mk #[13, 14, 15]) let commAddr := Address.blake3 (ByteArray.mk #[16, 17, 18]) let envWithBlobAndComm : Env := { - blobs := ({} : Std.HashMap _ _).insert blobAddr (ByteArray.mk #[4, 5, 6]), + blobs := ({} : Std.HashMap _ _).insert blobAddr blobBytes, comms := ({} : Std.HashMap _ _).insert commAddr (Comm.mk secretAddr payloadAddr) } + -- Test 7: Bundle env — main + assumptions + anonHints exercise the + -- bundle header and §3. + let bundleDef : Definition := { + kind := .defn, safety := .safe, lvls := 0, typ := .sort 0, value := .sort 0 + } + let bundleConst : Constant := { + info := .defn bundleDef, sharing := #[], refs := #[], univs := #[] + } + let bundleConstAddr := Address.blake3 (serConstant bundleConst) + let bundleEnv : Env := Id.run do + let mut env : Env := {} + env := env.storeConst bundleConstAddr bundleConst + return { env with + main := some bundleConstAddr + assumptions := ({} : Std.HashSet Address) + |>.insert (Address.blake3 (ByteArray.mk #[42])) + |>.insert (Address.blake3 (ByteArray.mk #[43])) + anonHints := ({} : Std.HashMap _ _).insert bundleConstAddr (.regular 5) } test "Empty env roundtrip" (envSerdeUnit emptyEnv) ++ test "Env with blob roundtrip" (envSerdeUnit envWithBlob) ++ test "Env with name roundtrip" (envSerdeUnit envWithName) ++ test "Env with named (empty meta) roundtrip" (envSerdeUnit envWithNamed) ++ test "Env with nested name roundtrip" (envSerdeUnit envWithNestedName) ++ - test "Env with blob+comm roundtrip" (envSerdeUnit envWithBlobAndComm) + test "Env with blob+comm roundtrip" (envSerdeUnit envWithBlobAndComm) ++ + test "Bundle env (main/assumptions/hints) roundtrip" (envSerdeUnit bundleEnv) /-! ## Cross-implementation serialization comparison tests -/ @@ -204,15 +225,22 @@ def envSerializationMatches (raw : RawEnv) : Bool := let env := raw.toEnv rsEqEnvSerialization raw (serEnv env) +/-- Strict byte equality between the pure-Lean writer and the Rust + writer over the same RawEnv (a stronger check than + `rsEqEnvSerialization`'s content comparison). -/ +def envBytesMatchRust (raw : RawEnv) : Bool := + serEnv raw.toEnv == rsSerEnvFFI raw + /-- Unit tests for Lean==Rust serialization comparison -/ def envSerializationUnitTests : TestSeq := -- Test 1: Empty env let emptyRaw : RawEnv := { consts := #[], named := #[], blobs := #[], comms := #[] } - -- Test 2: Env with one blob - let blobAddr := Address.blake3 (ByteArray.mk #[1, 2, 3]) + -- Test 2: Env with one blob (content-addressed: readers verify). + let blobBytes := ByteArray.mk #[4, 5, 6] + let blobAddr := Address.blake3 blobBytes let blobRaw : RawEnv := { consts := #[], named := #[], - blobs := #[{ addr := blobAddr, bytes := ByteArray.mk #[4, 5, 6] }], + blobs := #[{ addr := blobAddr, bytes := blobBytes }], comms := #[] } -- Test 3: Env with one comm @@ -227,13 +255,35 @@ def envSerializationUnitTests : TestSeq := -- Test 4: Env with blob + comm let blobCommRaw : RawEnv := { consts := #[], named := #[], - blobs := #[{ addr := blobAddr, bytes := ByteArray.mk #[4, 5, 6] }], + blobs := #[{ addr := blobAddr, bytes := blobBytes }], comms := #[{ addr := commAddr, comm := Comm.mk secretAddr payloadAddr }] } + -- Test 5: Bundle env — the directed cross-language vector for the + -- bundle header (main/assumptions) and §3 anon_hints. + let bundleDef : Definition := { + kind := .defn, safety := .safe, lvls := 0, typ := .sort 0, value := .sort 0 + } + let bundleConst : Constant := { + info := .defn bundleDef, sharing := #[], refs := #[], univs := #[] + } + let bundleConstAddr := Address.blake3 (serConstant bundleConst) + let bundleRaw : RawEnv := { + consts := #[{ addr := bundleConstAddr, const := bundleConst }], + named := #[], blobs := #[], comms := #[], + main := some bundleConstAddr, + assumptions := #[ + Address.blake3 (ByteArray.mk #[42]), + Address.blake3 (ByteArray.mk #[43])], + anonHints := #[(bundleConstAddr, .regular 5)] + } test "Empty env Lean==Rust" (envSerializationMatches emptyRaw) ++ test "Blob env Lean==Rust" (envSerializationMatches blobRaw) ++ test "Comm env Lean==Rust" (envSerializationMatches commRaw) ++ - test "Blob+Comm env Lean==Rust" (envSerializationMatches blobCommRaw) + test "Blob+Comm env Lean==Rust" (envSerializationMatches blobCommRaw) ++ + test "Bundle env Lean==Rust" (envSerializationMatches bundleRaw) ++ + test "Empty env bytes Lean==Rust" (envBytesMatchRust emptyRaw) ++ + test "Blob env bytes Lean==Rust" (envBytesMatchRust blobRaw) ++ + test "Bundle env bytes Lean==Rust" (envBytesMatchRust bundleRaw) /-! ## Canonical env merkle root: Lean vs. Rust agreement -/ diff --git a/crates/ffi/src/compile.rs b/crates/ffi/src/compile.rs index cf928511..4b28079a 100644 --- a/crates/ffi/src/compile.rs +++ b/crates/ffi/src/compile.rs @@ -485,6 +485,10 @@ pub extern "C" fn rs_compile_phases( raw_ixon_env.set_obj(2, blobs_arr); raw_ixon_env.set_obj(3, comms_arr); raw_ixon_env.set_obj(4, names_arr); + crate::lean_ixon::env::set_raw_env_bundle_fields( + &raw_ixon_env, + &compile_stt.env, + ); let result = LeanIxCompilePhases::alloc(0); result.set_obj(0, raw_env); @@ -581,6 +585,7 @@ pub extern "C" fn rs_compile_env_to_ixon( result.set_obj(2, blobs_arr); result.set_obj(3, comms_arr); result.set_obj(4, names_arr); + crate::lean_ixon::env::set_raw_env_bundle_fields(&result, &compile_stt.env); LeanIOResult::ok(result) } } diff --git a/crates/ffi/src/lean.rs b/crates/ffi/src/lean.rs index 0fc8d29e..cb920c9f 100644 --- a/crates/ffi/src/lean.rs +++ b/crates/ffi/src/lean.rs @@ -52,13 +52,15 @@ lean_ffi::lean_inductive! { LeanIxonRawBlob [ { num_obj: 2 } ]; LeanIxonRawComm [ { num_obj: 2 } ]; LeanIxonRawNameEntry [ { num_obj: 2 } ]; - LeanIxonRawEnv [ { num_obj: 5 } ]; + // consts, named, blobs, comms, names, main, assumptions, anonHints + LeanIxonRawEnv [ { num_obj: 8 } ]; // Lazy/anon deserialization (`rs_de_env_lazy`): zero-copy const windows - // (addr + offset + len), name->addr + hint, and copied blobs. + // (addr + offset + len), name->addr + hint, copied blobs, and the + // bundle header fields (main, assumptions). LeanIxonRawConstSlice [ { num_obj: 1, num_64: 2 } ]; LeanIxonRawNamedLite [ { num_obj: 2, num_64: 2 } ]; - LeanIxonRawEnvLazy [ { num_obj: 3 } ]; + LeanIxonRawEnvLazy [ { num_obj: 5 } ]; // --- Ixon multi-variant inductives --- diff --git a/crates/ffi/src/lean_ixon/env.rs b/crates/ffi/src/lean_ixon/env.rs index 398ada44..eddcd7ff 100644 --- a/crates/ffi/src/lean_ixon/env.rs +++ b/crates/ffi/src/lean_ixon/env.rs @@ -4,10 +4,10 @@ //! RawConst, RawNamed, RawBlob, RawComm. use crate::lean::{ - LeanIxName, LeanIxonComm, LeanIxonConstant, LeanIxonConstantMeta, - LeanIxonRawBlob, LeanIxonRawComm, LeanIxonRawConst, LeanIxonRawConstSlice, - LeanIxonRawEnv, LeanIxonRawEnvLazy, LeanIxonRawNameEntry, LeanIxonRawNamed, - LeanIxonRawNamedLite, + LeanIxName, LeanIxReducibilityHints, LeanIxonComm, LeanIxonConstant, + LeanIxonConstantMeta, LeanIxonRawBlob, LeanIxonRawComm, LeanIxonRawConst, + LeanIxonRawConstSlice, LeanIxonRawEnv, LeanIxonRawEnvLazy, + LeanIxonRawNameEntry, LeanIxonRawNamed, LeanIxonRawNamedLite, }; use ix_common::address::Address; use ix_common::env::{Name, ReducibilityHints}; @@ -17,7 +17,8 @@ use ixon::env::{Env as IxonEnv, LazyIndex, Named as IxonNamed}; use ixon::merkle::merkle_root_canonical; use ixon::metadata::ConstantMeta; use lean_ffi::object::{ - LeanArray, LeanBorrowed, LeanByteArray, LeanExcept, LeanOwned, LeanRef, + LeanArray, LeanBorrowed, LeanByteArray, LeanExcept, LeanOption, LeanOwned, + LeanProd, LeanRef, }; use crate::builder::LeanBuildCache; @@ -226,7 +227,7 @@ impl LeanIxonRawNameEntry { } // ============================================================================= -// RawEnv (consts, named, blobs, comms, names) +// RawEnv (consts, named, blobs, comms, names, main, assumptions, anonHints) // ============================================================================= /// Decoded Ixon.RawEnv @@ -236,6 +237,12 @@ pub struct DecodedRawEnv { pub blobs: Vec, pub comms: Vec, pub names: Vec, + /// Bundle root (`Env::main`). + pub main: Option
, + /// Bundle trust boundary (`Env::assumptions`), sorted ascending. + pub assumptions: Vec
, + /// Explicit reducibility hints (`Env::anon_hints`), sorted by address. + pub anon_hints: Vec<(Address, ReducibilityHints)>, } impl LeanIxonRawEnv { @@ -274,6 +281,27 @@ impl LeanIxonRawEnv { .set(i, LeanIxonRawNameEntry::build(&mut cache, &rn.addr, &rn.name)); } + // Build bundle fields: main (Option Address), assumptions + // (Array Address), anonHints (Array (Address × ReducibilityHints)). + let main_obj: LeanOwned = match &env.main { + None => LeanOption::none().into(), + Some(addr) => LeanOption::some(LeanIxAddress::build(addr)).into(), + }; + let assumptions_arr = LeanArray::alloc(env.assumptions.len()); + for (i, addr) in env.assumptions.iter().enumerate() { + assumptions_arr.set(i, LeanIxAddress::build(addr)); + } + let hints_arr = LeanArray::alloc(env.anon_hints.len()); + for (i, (addr, hint)) in env.anon_hints.iter().enumerate() { + hints_arr.set( + i, + LeanProd::new( + LeanIxAddress::build(addr), + LeanIxReducibilityHints::build(hint), + ), + ); + } + // Build RawEnv structure let ctor = LeanIxonRawEnv::alloc(0); ctor.set_obj(0, consts_arr); @@ -281,10 +309,52 @@ impl LeanIxonRawEnv { ctor.set_obj(2, blobs_arr); ctor.set_obj(3, comms_arr); ctor.set_obj(4, names_arr); + ctor.set_obj(5, main_obj); + ctor.set_obj(6, assumptions_arr); + ctor.set_obj(7, hints_arr); ctor } } +/// Populate the bundle-field constructor slots (5-7: main, +/// assumptions, anonHints) of a `RawEnv` being built field-by-field — +/// the compile FFI assembles its RawEnv manually instead of going +/// through [`LeanIxonRawEnv::build`], and leaving these slots unset is +/// uninitialized memory (segfault on GC). Set-shaped fields are sorted +/// for a deterministic transfer. +pub fn set_raw_env_bundle_fields( + ctor: &LeanIxonRawEnv, + env: &IxonEnv, +) { + let main_obj: LeanOwned = match &env.main { + None => LeanOption::none().into(), + Some(addr) => LeanOption::some(LeanIxAddress::build(addr)).into(), + }; + let mut assumptions: Vec
= + env.assumptions.iter().cloned().collect(); + assumptions.sort_unstable(); + let assumptions_arr = LeanArray::alloc(assumptions.len()); + for (i, addr) in assumptions.iter().enumerate() { + assumptions_arr.set(i, LeanIxAddress::build(addr)); + } + let mut hints: Vec<(Address, ReducibilityHints)> = + env.anon_hints.iter().map(|(a, h)| (a.clone(), *h)).collect(); + hints.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + let hints_arr = LeanArray::alloc(hints.len()); + for (i, (addr, hint)) in hints.iter().enumerate() { + hints_arr.set( + i, + LeanProd::new( + LeanIxAddress::build(addr), + LeanIxReducibilityHints::build(hint), + ), + ); + } + ctor.set_obj(5, main_obj); + ctor.set_obj(6, assumptions_arr); + ctor.set_obj(7, hints_arr); +} + impl LeanIxonRawEnv { /// Decode Ixon.RawEnv from Lean pointer. pub fn decode(&self) -> DecodedRawEnv { @@ -294,6 +364,23 @@ impl LeanIxonRawEnv { let blobs_arr = ctor.get(2).as_array(); let comms_arr = ctor.get(3).as_array(); let names_arr = ctor.get(4).as_array(); + let main_obj = ctor.get(5); + let assumptions_arr = ctor.get(6).as_array(); + let hints_arr = ctor.get(7).as_array(); + + // `Option Address`: scalar-optimized none, or a tag-0/1 ctor. + let main: Option
= if main_obj.is_scalar() { + None + } else { + let opt = main_obj.as_ctor(); + match opt.tag() { + 0 => None, + 1 => Some( + LeanIxAddress::from_borrowed(opt.get(0).as_byte_array()).decode(), + ), + tag => panic!("Invalid Option tag for RawEnv.main: {tag}"), + } + }; DecodedRawEnv { consts: consts_arr @@ -304,6 +391,17 @@ impl LeanIxonRawEnv { comms: comms_arr.map(|x| LeanIxonRawComm::new(x.to_owned_ref()).decode()), names: names_arr .map(|x| LeanIxonRawNameEntry::new(x.to_owned_ref()).decode()), + main, + assumptions: assumptions_arr + .map(|x| LeanIxAddress::from_borrowed(x.as_byte_array()).decode()), + anon_hints: hints_arr.map(|x| { + let pair = x.as_ctor(); + let addr = + LeanIxAddress::from_borrowed(pair.get(0).as_byte_array()).decode(); + let hint = + LeanIxReducibilityHints::new(pair.get(1).to_owned_ref()).decode(); + (addr, hint) + }), } } } @@ -314,7 +412,7 @@ impl LeanIxonRawEnv { /// Reconstruct a Rust IxonEnv from a DecodedRawEnv. pub fn decoded_to_ixon_env(decoded: &DecodedRawEnv) -> IxonEnv { - let env = IxonEnv::new(); + let mut env = IxonEnv::new(); for rc in &decoded.consts { env.store_const(rc.addr.clone(), rc.constant.clone()); } @@ -331,6 +429,11 @@ pub fn decoded_to_ixon_env(decoded: &DecodedRawEnv) -> IxonEnv { for rc in &decoded.comms { env.store_comm(rc.addr.clone(), rc.comm.clone()); } + env.main = decoded.main.clone(); + env.assumptions.extend(decoded.assumptions.iter().cloned()); + env + .anon_hints + .extend(decoded.anon_hints.iter().map(|(a, h)| (a.clone(), *h))); env } @@ -380,7 +483,23 @@ pub fn ixon_env_to_decoded(env: &IxonEnv) -> Result { name: e.value().clone(), }) .collect(); - Ok(DecodedRawEnv { consts, named, blobs, comms, names }) + // Sort the set-shaped fields so the FFI transfer is deterministic. + let mut assumptions: Vec
= + env.assumptions.iter().cloned().collect(); + assumptions.sort_unstable(); + let mut anon_hints: Vec<(Address, ReducibilityHints)> = + env.anon_hints.iter().map(|(a, h)| (a.clone(), *h)).collect(); + anon_hints.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + Ok(DecodedRawEnv { + consts, + named, + blobs, + comms, + names, + main: env.main.clone(), + assumptions, + anon_hints, + }) } // ============================================================================= @@ -451,11 +570,12 @@ pub extern "C" fn rs_de_env( /// FFI: Anonymous-only deserialization (`Env::get_anon`). /// -/// Reads the header + blobs + consts sections; parses and discards -/// the metadata sections (names / named / comms). The returned -/// `RawEnv` has empty `named`, `names`, `comms` arrays. Useful for -/// anon-mode kernel callers that want to avoid the steady-state -/// memory cost of metadata that they will never consult. +/// Reads the header + §1 blobs + §2 consts + §3 anon_hints and stops; +/// the metadata sections (names / named / comms) are laid out after +/// the hints and never touched. The returned `RawEnv` has empty +/// `named`, `names`, `comms` arrays. Useful for anon-mode kernel +/// callers that want to avoid the steady-state memory cost of +/// metadata they will never consult. #[unsafe(no_mangle)] pub extern "C" fn rs_de_env_anon( obj: LeanByteArray>, @@ -552,10 +672,21 @@ fn build_raw_env_lazy(index: &LazyIndex) -> LeanIxonRawEnvLazy { blobs_arr.set(i, LeanIxonRawBlob::build_from_parts(addr, bytes)); } + let main_obj: LeanOwned = match &index.main { + None => LeanOption::none().into(), + Some(addr) => LeanOption::some(LeanIxAddress::build(addr)).into(), + }; + let assumptions_arr = LeanArray::alloc(index.assumptions.len()); + for (i, addr) in index.assumptions.iter().enumerate() { + assumptions_arr.set(i, LeanIxAddress::build(addr)); + } + let ctor = LeanIxonRawEnvLazy::alloc(0); ctor.set_obj(0, consts_arr); ctor.set_obj(1, named_arr); ctor.set_obj(2, blobs_arr); + ctor.set_obj(3, main_obj); + ctor.set_obj(4, assumptions_arr); ctor } diff --git a/crates/ffi/src/lean_ixon/serialize.rs b/crates/ffi/src/lean_ixon/serialize.rs index 68061c03..e681802a 100644 --- a/crates/ffi/src/lean_ixon/serialize.rs +++ b/crates/ffi/src/lean_ixon/serialize.rs @@ -234,6 +234,60 @@ pub extern "C" fn rs_eq_env_serialization( } } + // Bundle header fields. + if rust_env.main != decoded.main { + if debug { + eprintln!( + "[rs_eq_env_serialization] main mismatch: rust={:?}, decoded={:?}", + rust_env.main.as_ref().map(Address::hex), + decoded.main.as_ref().map(Address::hex), + ); + } + return false; + } + let decoded_assumptions: rustc_hash::FxHashSet
= + decoded.assumptions.iter().cloned().collect(); + if rust_env.assumptions != decoded_assumptions { + if debug { + eprintln!( + "[rs_eq_env_serialization] assumptions mismatch: rust={}, decoded={}", + rust_env.assumptions.len(), + decoded_assumptions.len(), + ); + } + return false; + } + + // Hints: the writers derive §3 from Named `Def` metadata when the + // explicit map is empty, so the parsed env's hints must equal the + // same derivation over the decoded RawEnv. + let expected_hints: rustc_hash::FxHashMap< + Address, + ix_common::env::ReducibilityHints, + > = if decoded.anon_hints.is_empty() { + let mut derived = rustc_hash::FxHashMap::default(); + for rn in &decoded.named { + if let ixon::metadata::ConstantMetaInfo::Def { hints, .. } = + &rn.const_meta.info + { + derived.entry(rn.addr.clone()).or_insert(*hints); + } + } + derived + } else { + decoded.anon_hints.iter().cloned().collect() + }; + if rust_env.anon_hints != expected_hints { + if debug { + eprintln!( + "[rs_eq_env_serialization] anon_hints mismatch: rust={}, expected={}", + rust_env.anon_hints.len(), + expected_hints.len(), + ); + } + return false; + } + true } diff --git a/crates/ixon/src/env.rs b/crates/ixon/src/env.rs index 425d9b97..c49b6d60 100644 --- a/crates/ixon/src/env.rs +++ b/crates/ixon/src/env.rs @@ -5,10 +5,13 @@ use std::collections::VecDeque; use std::sync::Arc; use ix_common::address::Address; -use ix_common::env::{Name, ReducibilityHints}; +use ix_common::env::{Name, NameData, ReducibilityHints}; use super::comm::Comm; -use super::constant::Constant; +use super::constant::{ + Constant, ConstantInfo, MutConst, ctor_proj_address, defn_proj_address, + indc_proj_address, recr_proj_address, +}; use super::lazy::LazyConstant; use super::map::IxonMap; use super::metadata::{ConstantMeta, ConstantMetaInfo}; @@ -92,6 +95,10 @@ pub struct LazyIndex { pub consts: Vec, pub named: Vec, pub blobs: Vec<(Address, Vec)>, + /// Bundle root (`Env::main`) as read from the header. + pub main: Option
, + /// Bundle trust boundary (`Env::assumptions`), in header (sorted) order. + pub assumptions: Vec
, } /// The Ixon environment. @@ -131,6 +138,18 @@ pub struct Env { /// supplying them in anon mode does not relax the kernel's /// metadata-free correctness model. pub anon_hints: FxHashMap, + /// Distinguished root constant for bundle envs; `None` for whole + /// environments. A pointer, not a proof: nothing in the file + /// authenticates it, so readers check `main ∈ consts` and consumers + /// holding an externally-expected address (from a Claim, a request) + /// must compare against it. + pub main: Option
, + /// Explicit trust boundary for thin bundles: addresses (constants + /// or blobs) the receiver is expected to already have, so the + /// closure of `main` need not be carried in full. Serialized as a + /// strictly ascending leaf list; `merkle_root_canonical` over it + /// reproduces the root a `Claim::assumptions` field commits to. + pub assumptions: FxHashSet
, } impl Env { @@ -142,6 +161,8 @@ impl Env { names: IxonMap::new(), comms: IxonMap::new(), anon_hints: FxHashMap::default(), + main: None, + assumptions: FxHashSet::default(), } } @@ -336,6 +357,280 @@ impl Env { v.sort_unstable(); v } + + /// Collect `c`'s outgoing closure edges: Expr-level `refs`, a + /// projection's structural `block` pointer, and — for a `Muts` + /// block at `addr` — every member/constructor projection address + /// (derived, not stored anywhere; `ingress_anon_block` computes + /// these and requires them present in the env). + fn closure_edges(addr: &Address, c: &Constant, edges: &mut Vec
) { + edges.extend(c.refs.iter().cloned()); + match &c.info { + ConstantInfo::IPrj(p) => edges.push(p.block.clone()), + ConstantInfo::CPrj(p) => edges.push(p.block.clone()), + ConstantInfo::RPrj(p) => edges.push(p.block.clone()), + ConstantInfo::DPrj(p) => edges.push(p.block.clone()), + ConstantInfo::Muts(members) => { + for (i, m) in members.iter().enumerate() { + let i = i as u64; + edges.push(match m { + MutConst::Defn(_) => defn_proj_address(i, addr), + MutConst::Indc(_) => indc_proj_address(i, addr), + MutConst::Recr(_) => recr_proj_address(i, addr), + }); + if let MutConst::Indc(ind) = m { + for cidx in 0..ind.ctors.len() as u64 { + edges.push(ctor_proj_address(i, cidx, addr)); + } + } + } + }, + _ => {}, + } + } + + /// BFS-collect the full dependency closure of `roots`, following + /// all three structural edge kinds (see [`Self::closure_edges`]): + /// + /// 1. `Constant.refs` — Expr-level references (constants and blobs); + /// 2. projection → its `Muts` block (`DPrj`/`IPrj`/`RPrj`/`CPrj` + /// carry `block` in the info payload and have EMPTY refs tables, + /// so a refs-only walk returns just the projection itself); + /// 3. `Muts` block → every member/constructor projection address. + /// + /// Compare [`Self::bfs_refs`], which follows only edge 1 and stays + /// the basis of claim assumption roots. The kernel's + /// `anon_work::closure_addrs` delegates here. + /// + /// Referenced addresses absent from `consts` (blobs and external + /// assumptions) appear in the returned set as leaves. + pub fn bfs_closure(&self, roots: &[Address]) -> FxHashSet
{ + let mut closure: FxHashSet
= FxHashSet::default(); + let mut queue: VecDeque
= VecDeque::new(); + for r in roots { + if closure.insert(r.clone()) { + queue.push_back(r.clone()); + } + } + while let Some(addr) = queue.pop_front() { + // Materialize just long enough to read edges; drop the map + // guard before recursing (see `bfs_refs`). + let constant = self.consts.get(&addr).and_then(|r| r.value().get().ok()); + let Some(c) = constant else { continue }; + let mut edges: Vec
= Vec::new(); + Self::closure_edges(&addr, &c, &mut edges); + for e in edges { + if closure.insert(e.clone()) { + queue.push_back(e); + } + } + } + closure + } + + /// Bundle completeness check (receiver side): `main` must be a + /// stored constant, and every address reachable from it via + /// [`Self::bfs_closure`] must be either carried by this env + /// (consts ∪ blobs) or declared in `assumptions`. + /// + /// This validates the VALUE pin. Display-metadata completeness + /// (names, `DataValue` blobs) is the writer's business — + /// [`Self::prune_to_closure`] carries it; a receiver that only + /// typechecks/evaluates never needs it. + pub fn validate_closed(&self) -> Result<(), String> { + let Some(main) = self.main.clone() else { + return Err("validate_closed: env has no main".to_string()); + }; + if self.consts.get(&main).is_none() { + return Err(format!( + "validate_closed: main {} not present in consts", + main.hex() + )); + } + for addr in self.bfs_closure(std::slice::from_ref(&main)) { + if self.consts.contains_key(&addr) + || self.blobs.contains_key(&addr) + || self.assumptions.contains(&addr) + { + continue; + } + return Err(format!( + "validate_closed: {} reachable from main but neither carried nor \ + assumed", + addr.hex() + )); + } + Ok(()) + } + + /// Build a self-contained bundle env: the 3-edge closure of `main`, + /// cut at `assumed`. + /// + /// - Reached constants are carried with their GENUINE bytes + /// (`store_const_lazy`), so the receiver's per-entry hash check + /// and consts merkle root hold; reached blobs are copied. + /// - Cut-points actually reached go to `out.assumptions` (minimal: + /// a declared-but-unreached assumption is not carried). + /// - `anon_hints` are copied per carried constant (else a bundle + /// regresses kernel-check time to the `Regular(0)` fallback). + /// - Display metadata is carried for every carried constant: its + /// `named` entries (all of them — alpha-equivalent names may + /// share one address), the name components they reference (with + /// full parent chains and string-component blobs), `DataValue` + /// payload blobs, `meta_refs` extension DAG edges, and aux_gen + /// `original` constants. Metadata can introduce new DAG edges, so + /// the walk runs to fixpoint (usually two rounds). + /// + /// Errors if a reached address is in neither `consts`, `blobs`, nor + /// `assumed` — the source env cannot produce a closed bundle for + /// `main` under that cut. + /// + /// Host-only — see `store_blob`. + #[cfg(not(target_arch = "riscv64"))] + pub fn prune_to_closure( + &self, + main: &Address, + assumed: &FxHashSet
, + ) -> Result { + if assumed.contains(main) { + return Err("prune_to_closure: main cannot be assumed".to_string()); + } + let mut out = Env::new(); + out.main = Some(main.clone()); + + let mut visited: FxHashSet
= FxHashSet::default(); + let mut pending: VecDeque
= VecDeque::new(); + visited.insert(main.clone()); + pending.push_back(main.clone()); + let mut named_done: FxHashSet = FxHashSet::default(); + + loop { + // ── Value pass: 3-edge BFS over pending roots, cut at `assumed`. + while let Some(addr) = pending.pop_front() { + if assumed.contains(&addr) { + out.assumptions.insert(addr); + continue; + } + if let Some(bytes) = self.get_const_bytes(&addr) { + out.store_const_lazy(addr.clone(), bytes); + if let Some(h) = self.anon_hints.get(&addr) { + out.anon_hints.insert(addr.clone(), *h); + } + let c = self.get_const(&addr).ok_or_else(|| { + format!("prune_to_closure: constant {} unparseable", addr.hex()) + })?; + let mut edges: Vec
= Vec::new(); + Self::closure_edges(&addr, &c, &mut edges); + for e in edges { + if visited.insert(e.clone()) { + pending.push_back(e); + } + } + } else if let Some(blob) = self.get_blob(&addr) { + out.blobs.insert(addr, blob); + } else { + return Err(format!( + "prune_to_closure: {} reachable from main but not in \ + consts/blobs and not assumed", + addr.hex() + )); + } + } + + // ── Named pass: carry display metadata for every carried + // constant. Metadata references content the value walk cannot + // see; new DAG edges feed the next value pass. + for entry in self.named.iter() { + let named = entry.value(); + if !out.consts.contains_key(&named.addr) + || named_done.contains(entry.key()) + { + continue; + } + named_done.insert(entry.key().clone()); + out.named.insert(entry.key().clone(), named.clone()); + self.carry_name(&mut out, entry.key()); + + let mut name_addrs: Vec
= Vec::new(); + let mut blob_addrs: Vec
= Vec::new(); + let mut dag_addrs: Vec
= Vec::new(); + named.meta.collect_deps( + &mut name_addrs, + &mut blob_addrs, + &mut dag_addrs, + ); + if let Some((orig_addr, orig_meta)) = &named.original { + dag_addrs.push(orig_addr.clone()); + orig_meta.collect_deps( + &mut name_addrs, + &mut blob_addrs, + &mut dag_addrs, + ); + } + for na in name_addrs { + let Some(name) = self.get_name(&na) else { + return Err(format!( + "prune_to_closure: metadata references name {} absent from \ + names", + na.hex() + )); + }; + self.carry_name(&mut out, &name); + } + for ba in blob_addrs { + let Some(blob) = self.get_blob(&ba) else { + return Err(format!( + "prune_to_closure: metadata references blob {} absent from \ + blobs", + ba.hex() + )); + }; + out.blobs.insert(ba, blob); + } + for da in dag_addrs { + if visited.insert(da.clone()) { + pending.push_back(da); + } + } + } + + // The named pass ran against the final consts of this round; if + // it produced no new DAG work, the walk is complete. + if pending.is_empty() { + break; + } + } + Ok(out) + } + + /// Copy `name` and its full parent chain into `out.names`, storing + /// each string component's bytes as a blob (the compiler's + /// convention — mirrors `addNameComponentsWithBlobs` on the Lean + /// side). Stops early once a component is already present: its + /// parents were carried with it. + #[cfg(not(target_arch = "riscv64"))] + fn carry_name(&self, out: &mut Env, name: &Name) { + let mut cur = name.clone(); + loop { + let addr = Address::from_blake3_hash(*cur.get_hash()); + if out.names.get(&addr).is_some() { + return; + } + out.names.insert(addr, cur.clone()); + let next = match cur.as_data() { + NameData::Anonymous(_) => None, + NameData::Str(parent, s, _) => { + out.store_blob(s.as_bytes().to_vec()); + Some(parent.clone()) + }, + NameData::Num(parent, _, _) => Some(parent.clone()), + }; + match next { + Some(p) => cur = p, + None => return, + } + } + } } impl Clone for Env { @@ -375,6 +670,8 @@ impl Clone for Env { names, comms, anon_hints: self.anon_hints.clone(), + main: self.main.clone(), + assumptions: self.assumptions.clone(), } } } @@ -692,4 +989,173 @@ mod tests { ); } } + + // --------------------------------------------------------------------------- + // bfs_closure / prune_to_closure / validate_closed + // --------------------------------------------------------------------------- + + fn defn_member(discriminator: u64) -> crate::constant::Definition { + use crate::constant::{DefKind, Definition}; + use ix_common::env::DefinitionSafety; + Definition { + kind: DefKind::Definition, + safety: DefinitionSafety::Safe, + lvls: discriminator, + typ: Arc::new(Expr::Sort(0)), + value: Arc::new(Expr::Var(0)), + } + } + + #[test] + fn bfs_closure_follows_projection_and_block_edges() { + use crate::constant::{ + MutConst, defn_proj_address, defn_proj_constant, + }; + let env = Env::new(); + let block_addr = store_canonical( + &env, + Constant::new(ConstantInfo::Muts(vec![ + MutConst::Defn(defn_member(0)), + MutConst::Defn(defn_member(1)), + ])), + ); + // Member projections live at derived addresses. + let p0_addr = store_canonical(&env, defn_proj_constant(0, block_addr.clone())); + assert_eq!(p0_addr, defn_proj_address(0, &block_addr)); + let p1_addr = store_canonical(&env, defn_proj_constant(1, block_addr.clone())); + + // From a projection root: the block and its sibling projections + // are reachable via the structural edges. + let closure = env.bfs_closure(std::slice::from_ref(&p0_addr)); + assert!(closure.contains(&p0_addr)); + assert!(closure.contains(&block_addr), "Prj → block edge"); + assert!(closure.contains(&p1_addr), "block → sibling projection edge"); + + // A refs-only walk sees none of this (projections have empty refs). + let refs_only = env.bfs_refs(&p0_addr); + assert_eq!(refs_only.len(), 1, "refs-only walk stops at the projection"); + } + + #[test] + fn prune_to_closure_carries_value_closure_and_blobs() { + let env = Env::new(); + let blob_addr = env.store_blob(b"forty two".to_vec()); + let c = store_canonical(&env, const_with_refs(vec![])); + let a = store_canonical( + &env, + const_with_refs(vec![blob_addr.clone(), c.clone()]), + ); + let d = store_canonical(&env, const_with_refs_discriminator(vec![], 7)); + + let bundle = env.prune_to_closure(&a, &FxHashSet::default()).unwrap(); + assert_eq!(bundle.main, Some(a.clone())); + assert!(bundle.consts.contains_key(&a)); + assert!(bundle.consts.contains_key(&c)); + assert!(!bundle.consts.contains_key(&d), "unreachable const not carried"); + assert_eq!(bundle.get_blob(&blob_addr), Some(b"forty two".to_vec())); + assert!(bundle.assumptions.is_empty()); + bundle.validate_closed().unwrap(); + + // Bundle survives a serialize → deserialize roundtrip closed + // (genuine bytes: per-entry hashes and the merkle root hold). + let mut buf = Vec::new(); + bundle.put(&mut buf).unwrap(); + let loaded = Env::get(&mut buf.as_slice()).unwrap(); + loaded.validate_closed().unwrap(); + assert_eq!(loaded.main, Some(a)); + } + + #[test] + fn prune_to_closure_cuts_at_assumed() { + let env = Env::new(); + let c = store_canonical(&env, const_with_refs(vec![])); + let b = store_canonical(&env, const_with_refs(vec![c.clone()])); + let a = store_canonical(&env, const_with_refs(vec![b.clone()])); + let assumed: FxHashSet
= [b.clone()].into_iter().collect(); + let bundle = env.prune_to_closure(&a, &assumed).unwrap(); + assert!(bundle.consts.contains_key(&a)); + assert!(!bundle.consts.contains_key(&b), "cut point not carried"); + assert!(!bundle.consts.contains_key(&c), "beyond the cut not carried"); + assert_eq!(bundle.assumptions, assumed, "minimal assumptions = reached cuts"); + bundle.validate_closed().unwrap(); + } + + #[test] + fn prune_to_closure_missing_dep_errors_unless_assumed() { + let env = Env::new(); + let ghost = Address::hash(b"ghost"); + let a = store_canonical(&env, const_with_refs(vec![ghost.clone()])); + let err = env.prune_to_closure(&a, &FxHashSet::default()).unwrap_err(); + assert!(err.contains("not in consts/blobs"), "got: {err}"); + let assumed: FxHashSet
= [ghost].into_iter().collect(); + let bundle = env.prune_to_closure(&a, &assumed).unwrap(); + bundle.validate_closed().unwrap(); + } + + #[test] + fn validate_closed_rejects_missing_blob_unless_assumed() { + let mut env = Env::new(); + let blob_addr = Address::hash(b"blob-bytes"); + let a = store_canonical(&env, const_with_refs(vec![blob_addr.clone()])); + env.main = Some(a); + let err = env.validate_closed().unwrap_err(); + assert!(err.contains("reachable from main"), "got: {err}"); + env.assumptions.insert(blob_addr); + env.validate_closed().unwrap(); + } + + #[test] + fn prune_to_closure_carries_named_metadata() { + use crate::metadata::{ConstantMetaInfo, ExprMeta}; + let env = Env::new(); + let a = store_canonical(&env, const_with_refs(vec![])); + // A blob referenced ONLY from metadata (meta_refs extension table) + // — invisible to the value walk. + let meta_blob = env.store_blob(b"callsite payload".to_vec()); + let name = n("Bundled"); + let name_addr = Address::from_blake3_hash(*name.get_hash()); + env.store_name(name_addr.clone(), name.clone()); + let mut meta = ConstantMeta::new(ConstantMetaInfo::Def { + name: name_addr.clone(), + lvls: vec![], + hints: ReducibilityHints::Regular(3), + all: vec![], + ctx: vec![], + arena: ExprMeta::default(), + type_root: 0, + value_root: 0, + }); + meta.meta_refs.push(meta_blob.clone()); + env.register_name( + name.clone(), + Named { addr: a.clone(), meta, original: None }, + ); + + let bundle = env.prune_to_closure(&a, &FxHashSet::default()).unwrap(); + assert!(bundle.named.get(&name).is_some(), "named entry carried"); + assert_eq!( + bundle.get_name(&name_addr), + Some(name.clone()), + "name component carried" + ); + assert_eq!( + bundle.get_blob(&meta_blob), + Some(b"callsite payload".to_vec()), + "meta_refs blob carried" + ); + // String-component blob carried too (compiler convention). + assert_eq!( + bundle.get_blob(&Address::hash(b"Bundled")), + Some(b"Bundled".to_vec()) + ); + // Hints derive from the carried Def meta on serialization; the + // anon reader sees them without touching metadata sections. + let mut buf = Vec::new(); + bundle.put(&mut buf).unwrap(); + let anon = Env::get_anon(&mut buf.as_slice()).unwrap(); + assert_eq!( + anon.anon_hints.get(&a), + Some(&ReducibilityHints::Regular(3)) + ); + } } diff --git a/crates/ixon/src/metadata.rs b/crates/ixon/src/metadata.rs index e7699267..6bd4cb7c 100644 --- a/crates/ixon/src/metadata.rs +++ b/crates/ixon/src/metadata.rs @@ -252,6 +252,105 @@ impl ConstantMeta { || !self.meta_univs.is_empty() } + /// Enumerate every external address this metadata references, + /// partitioned by the table that resolves it: + /// + /// - `names`: name-component addresses (resolved against + /// `Env.names`) — the variant's `name`/`lvls`/`all`/`ctx`/... + /// fields, arena binder/ref/proj/call-site names, and KVMap keys + /// plus `DataValue::OfName` payloads; + /// - `blobs`: raw-byte payload addresses (`DataValue` + /// strings/nats/ints/syntax, resolved against `Env.blobs`); + /// - `dag`: `meta_refs` extension-table addresses — constants or + /// blobs referenced by collapsed call-site argument expressions, + /// i.e. genuine value-DAG edges the primary `Constant.refs` walk + /// cannot see. + /// + /// Used by `Env::prune_to_closure` to carry a bundle's display + /// metadata completely. Duplicates are not filtered; callers dedup. + pub fn collect_deps( + &self, + names: &mut Vec
, + blobs: &mut Vec
, + dag: &mut Vec
, + ) { + use ConstantMetaInfo as I; + let mut arena: Option<&ExprMeta> = None; + match &self.info { + I::Empty => {}, + I::Def { name, lvls, all, ctx, arena: a, .. } => { + names.push(name.clone()); + names.extend(lvls.iter().cloned()); + names.extend(all.iter().cloned()); + names.extend(ctx.iter().cloned()); + arena = Some(a); + }, + I::Axio { name, lvls, arena: a, .. } + | I::Quot { name, lvls, arena: a, .. } => { + names.push(name.clone()); + names.extend(lvls.iter().cloned()); + arena = Some(a); + }, + I::Indc { name, lvls, ctors, all, ctx, arena: a, .. } => { + names.push(name.clone()); + names.extend(lvls.iter().cloned()); + names.extend(ctors.iter().cloned()); + names.extend(all.iter().cloned()); + names.extend(ctx.iter().cloned()); + arena = Some(a); + }, + I::Ctor { name, lvls, induct, arena: a, .. } => { + names.push(name.clone()); + names.extend(lvls.iter().cloned()); + names.push(induct.clone()); + arena = Some(a); + }, + I::Rec { name, lvls, rules, all, ctx, arena: a, .. } => { + names.push(name.clone()); + names.extend(lvls.iter().cloned()); + names.extend(rules.iter().cloned()); + names.extend(all.iter().cloned()); + names.extend(ctx.iter().cloned()); + arena = Some(a); + }, + I::Muts { all, .. } => { + for class in all { + names.extend(class.iter().cloned()); + } + }, + } + if let Some(a) = arena { + for node in &a.nodes { + match node { + ExprMetaData::Leaf | ExprMetaData::App { .. } => {}, + ExprMetaData::Binder { name, .. } + | ExprMetaData::LetBinder { name, .. } + | ExprMetaData::Ref { name } + | ExprMetaData::CallSite { name, .. } => names.push(name.clone()), + ExprMetaData::Prj { struct_name, .. } => { + names.push(struct_name.clone()); + }, + ExprMetaData::Mdata { mdata, .. } => { + for kv in mdata { + for (key, value) in kv { + names.push(key.clone()); + match value { + DataValue::OfName(a) => names.push(a.clone()), + DataValue::OfString(a) + | DataValue::OfNat(a) + | DataValue::OfInt(a) + | DataValue::OfSyntax(a) => blobs.push(a.clone()), + DataValue::OfBool(_) => {}, + } + } + } + }, + } + } + } + dag.extend(self.meta_refs.iter().cloned()); + } + /// Delegate indexed serialization to the inner enum, then serialize /// extension tables. pub fn put_indexed( diff --git a/crates/ixon/src/serialize.rs b/crates/ixon/src/serialize.rs index 2e2fd9b8..a6201999 100644 --- a/crates/ixon/src/serialize.rs +++ b/crates/ixon/src/serialize.rs @@ -76,25 +76,143 @@ fn put_address(a: &Address, buf: &mut Vec) { put_bytes(a.as_bytes(), buf); } -/// Read the optional trailing `anon_hints` section written by `Env::put`. -/// No-op when no bytes remain after the comms section — backward-compatible -/// with `.ixe` that have no hints or predate the section. Inserts into -/// `env.anon_hints` (overwriting any value harvested from a Named section, -/// with the identical value, so the order of harvest vs. section is moot). -fn read_anon_hints_section( +/// Write an `Option
` as `[0x00]` (None) or `[0x01][addr:32]` +/// (Some). Mirrors the Claim-side encoding in `proof.rs`. +fn put_opt_addr(opt: &Option
, buf: &mut Vec) { + match opt { + None => buf.push(0x00), + Some(addr) => { + buf.push(0x01); + buf.extend_from_slice(addr.as_bytes()); + }, + } +} + +fn get_opt_addr(buf: &mut &[u8]) -> Result, String> { + match get_u8(buf)? { + 0x00 => Ok(None), + 0x01 => Ok(Some(get_address(buf)?)), + b => Err(format!("get_opt_addr: invalid tag 0x{:02X}", b)), + } +} + +/// Read the `.ixe` header shared by every Env reader: `Tag4(0xE, 0)`, +/// the 32-byte consts merkle root, the bundle `main` pointer, and the +/// strictly ascending assumptions list. Centralized so the four readers +/// (`get`, `get_anon`, `get_anon_mmap`, `parse_lazy_index`) cannot +/// drift. `ctx` labels errors with the calling reader. +fn read_env_header( buf: &mut &[u8], - env: &mut Env, -) -> Result<(), String> { - if buf.is_empty() { - return Ok(()); + ctx: &str, +) -> Result<(Address, Option
, Vec
), String> { + let tag = Tag4::get(buf)?; + if tag.flag != Env::FLAG { + return Err(format!( + "{ctx}: expected flag 0x{:X}, got 0x{:X}", + Env::FLAG, + tag.flag + )); + } + if tag.size != 0 { + return Err(format!("{ctx}: expected Env variant 0, got {}", tag.size)); + } + let stored_root = get_address(buf)?; + // A pre-bundle-format `.ixe` has the §1 blob count here, so this + // byte is that count's Tag0 head — flag the likely cause when it + // isn't a valid opt tag. (.ixe files are regenerated artifacts.) + let main = get_opt_addr(buf).map_err(|e| { + format!( + "{ctx}: {e} in bundle header — possibly a pre-bundle-format .ixe; \ + recompile it" + ) + })?; + let n = get_u64(buf)? as usize; + // Each assumption is 32 bytes; a count the remaining buffer cannot + // hold is corruption (or a stale format) — reject before allocating. + if n > buf.len() / 32 { + return Err(format!( + "{ctx}: assumption count {n} exceeds remaining buffer — corrupt or \ + pre-bundle-format .ixe" + )); + } + let mut assumptions: Vec
= Vec::with_capacity(n); + for i in 0..n { + let addr = get_address(buf)?; + if let Some(prev) = assumptions.last() + && *prev >= addr + { + return Err(format!( + "{ctx}: assumptions not strictly ascending at idx {i} ({} then {})", + prev.hex(), + addr.hex() + )); + } + assumptions.push(addr); } - let n = get_u64(buf)?; + Ok((stored_root, main, assumptions)) +} + +/// Read the §1 blob section, verifying `Address::hash(bytes) == addr` +/// per entry. Without the check a swapped blob would silently change a +/// Nat/String literal's value under an otherwise-valid file (the consts +/// merkle root covers only const addresses). +fn read_blob_section( + buf: &mut &[u8], + ctx: &str, +) -> Result)>, String> { + let num_blobs = get_u64(buf)? as usize; + // Each blob entry needs at least addr (32) + a length byte. + if num_blobs > buf.len() / 33 { + return Err(format!( + "{ctx}: blob count {num_blobs} exceeds remaining buffer" + )); + } + let mut blobs = Vec::with_capacity(num_blobs); + for i in 0..num_blobs { + let addr = get_address(buf)?; + let len = get_u64(buf)? as usize; + if buf.len() < len { + return Err(format!( + "{ctx}: need {} bytes for blob, have {}", + len, + buf.len() + )); + } + let (bytes, rest) = buf.split_at(len); + *buf = rest; + let computed = Address::hash(bytes); + if computed != addr { + return Err(format!( + "{ctx}: blob at idx {i} bytes hash to {} but stored under {}", + computed.hex(), + addr.hex() + )); + } + blobs.push((addr, bytes.to_vec())); + } + Ok(blobs) +} + +/// Read the §3 `anon_hints` section (unconditional — every writer +/// emits it, deriving the entries from Named metadata when the +/// in-memory map is empty; see `Env::put`). +fn read_hints_section( + buf: &mut &[u8], +) -> Result, String> { + let n = get_u64(buf)? as usize; + // Each hint entry needs at least addr (32) + one byte of hint. + if n > buf.len() / 33 { + return Err(format!( + "read_hints_section: hint count {n} exceeds remaining buffer" + )); + } + let mut hints = Vec::with_capacity(n); for _ in 0..n { let addr = get_address(buf)?; - let hints = ReducibilityHints::get_ser(buf)?; - env.anon_hints.insert(addr, hints); + let hint = ReducibilityHints::get_ser(buf)?; + hints.push((addr, hint)); } - Ok(()) + Ok(hints) } fn get_address(buf: &mut &[u8]) -> Result { @@ -1147,12 +1265,32 @@ impl Env { let root = merkle_root_canonical(&const_addrs).unwrap_or_else(zero_address); put_address(&root, buf); + // ───────────────────────────────────────────────────────────────────── + // Bundle header fields: distinguished root + assumed-present list + // (see `Env::main` / `Env::assumptions`). Writer-side sanity: a + // `main` outside `consts` would produce a file every reader + // rejects — fail here with the clearer message. + // ───────────────────────────────────────────────────────────────────── + if let Some(m) = &self.main + && self.consts.get(m).is_none() + { + return Err(format!("Env::put: main {} not present in consts", m.hex())); + } + put_opt_addr(&self.main, buf); + let mut assumption_addrs: Vec
= + self.assumptions.iter().cloned().collect(); + assumption_addrs.sort_unstable(); + put_u64(assumption_addrs.len() as u64, buf); + for addr in &assumption_addrs { + put_address(addr, buf); + } + // ───────────────────────────────────────────────────────────────────── // Section 1: Blobs (Address -> bytes) // ───────────────────────────────────────────────────────────────────── let sec_start = std::time::Instant::now(); if !quiet { - eprintln!("[Env::put] section 1/5 blobs: {} entries", self.blobs.len(),); + eprintln!("[Env::put] section 1/6 blobs: {} entries", self.blobs.len(),); } let mut blob_addrs: Vec
= self.blobs.iter().map(|e| e.key().clone()).collect(); @@ -1171,7 +1309,7 @@ impl Env { } if !quiet { eprintln!( - "[Env::put] section 1/5 blobs done in {:.1}s ({} bytes so far)", + "[Env::put] section 1/6 blobs done in {:.1}s ({} bytes so far)", sec_start.elapsed().as_secs_f64(), buf.len(), ); @@ -1185,11 +1323,11 @@ impl Env { // ───────────────────────────────────────────────────────────────────── let sec_start = std::time::Instant::now(); if !quiet { - eprintln!("[Env::put] section 2/5 consts: {} entries", self.consts.len(),); + eprintln!("[Env::put] section 2/6 consts: {} entries", self.consts.len(),); } if !quiet { eprintln!( - "[Env::put] section 2/5 consts: collected+sorted in {:.1}s, \ + "[Env::put] section 2/6 consts: collected+sorted in {:.1}s, \ streaming put...", sec_start.elapsed().as_secs_f64(), ); @@ -1210,7 +1348,7 @@ impl Env { } if !quiet { eprintln!( - "[Env::put] section 2/5 consts done: put in {:.1}s, total {:.1}s \ + "[Env::put] section 2/6 consts done: put in {:.1}s, total {:.1}s \ ({} bytes so far)", put_start.elapsed().as_secs_f64(), sec_start.elapsed().as_secs_f64(), @@ -1219,7 +1357,67 @@ impl Env { } // ───────────────────────────────────────────────────────────────────── - // Section 3: Names (Address -> Name component, topologically sorted) + // Section 3: anon_hints (Address -> ReducibilityHints) + // + // The canonical hint channel for the anon/lazy readers, placed + // before the metadata sections so `get_anon`/`get_anon_mmap` can + // stop right after it (they never touch names/named/comms). When + // the in-memory map is empty (the compile path: hints live in + // Named metadata), the section is derived from `named` — the same + // harvest `get_anon` historically did at read time, moved to + // write time. Hints are performance-only advice and intentionally + // NOT covered by the consts merkle root. + // + // `named_keys` is collected and sorted here (by name hash — the + // Section 5 canonical order) and reused for Section 5 below; the + // sorted iteration makes the first-wins dedup by constant address + // deterministic when alpha-equivalent names share one constant. + // ───────────────────────────────────────────────────────────────────── + let sec_start = std::time::Instant::now(); + let mut named_keys: Vec = + self.named.iter().map(|e| e.key().clone()).collect(); + #[cfg(not(target_arch = "riscv64"))] + named_keys.par_sort_unstable_by(|a, b| { + a.get_hash().as_bytes().cmp(b.get_hash().as_bytes()) + }); + #[cfg(target_arch = "riscv64")] + named_keys.sort_unstable_by(|a, b| { + a.get_hash().as_bytes().cmp(b.get_hash().as_bytes()) + }); + let mut hint_pairs: Vec<(Address, ReducibilityHints)> = + if self.anon_hints.is_empty() { + let mut derived: FxHashMap = + FxHashMap::default(); + for name in &named_keys { + if let Some(entry) = self.named.get(name) + && let super::metadata::ConstantMetaInfo::Def { hints, .. } = + &entry.value().meta.info + { + derived.entry(entry.value().addr.clone()).or_insert(*hints); + } + } + derived.into_iter().collect() + } else { + self.anon_hints.iter().map(|(a, h)| (a.clone(), *h)).collect() + }; + hint_pairs.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + put_u64(hint_pairs.len() as u64, buf); + for (addr, hints) in &hint_pairs { + put_address(addr, buf); + hints.put_ser(buf); + } + if !quiet { + eprintln!( + "[Env::put] section 3/6 anon_hints done: {} entries in {:.1}s \ + ({} bytes so far)", + hint_pairs.len(), + sec_start.elapsed().as_secs_f64(), + buf.len(), + ); + } + + // ───────────────────────────────────────────────────────────────────── + // Section 4: Names (Address -> Name component, topologically sorted) // ───────────────────────────────────────────────────────────────────── // Topological sort ensures parents come before children so the name // index assigned during serialization is valid for all references that @@ -1228,14 +1426,14 @@ impl Env { let sec_start = std::time::Instant::now(); if !quiet { eprintln!( - "[Env::put] section 3/5 names: topo-sorting {} entries", + "[Env::put] section 4/6 names: topo-sorting {} entries", self.names.len(), ); } let sorted_names = topological_sort_names(&self.names); if !quiet { eprintln!( - "[Env::put] section 3/5 names: topo-sorted in {:.1}s, serializing...", + "[Env::put] section 4/6 names: topo-sorted in {:.1}s, serializing...", sec_start.elapsed().as_secs_f64(), ); } @@ -1249,7 +1447,7 @@ impl Env { } if !quiet { eprintln!( - "[Env::put] section 3/5 names done: put in {:.1}s, total {:.1}s \ + "[Env::put] section 4/6 names done: put in {:.1}s, total {:.1}s \ ({} bytes so far)", put_start.elapsed().as_secs_f64(), sec_start.elapsed().as_secs_f64(), @@ -1258,37 +1456,18 @@ impl Env { } // ───────────────────────────────────────────────────────────────────── - // Section 4: Named (Name -> Named metadata with indexed addresses) + // Section 5: Named (Name -> Named metadata with indexed addresses) // ───────────────────────────────────────────────────────────────────── // Named values are the *largest* per-entry (each carries a ConstantMeta // with metadata arenas), so the streaming pattern's win is greatest // here: on Mathlib, avoiding the clone-into-Vec saves ~30 GB peak RAM. // - // Key clone cost: a `Name` is `Arc`, so each clone is a - // single atomic refcount increment (<1s for 733k). + // `named_keys` was collected and sorted (by cached name hash bytes) + // up in Section 3, where the hint derivation needs the same + // canonical order. let sec_start = std::time::Instant::now(); if !quiet { - eprintln!("[Env::put] section 4/5 named: {} entries", self.named.len(),); - } - let mut named_keys: Vec = - self.named.iter().map(|e| e.key().clone()).collect(); - // Sort by the cached name hash bytes (same key used by every existing - // Section 4 ordering guarantee). `par_sort_unstable_by` uses rayon to - // parallelize the compare across all cores. - #[cfg(not(target_arch = "riscv64"))] - named_keys.par_sort_unstable_by(|a, b| { - a.get_hash().as_bytes().cmp(b.get_hash().as_bytes()) - }); - #[cfg(target_arch = "riscv64")] - named_keys.sort_unstable_by(|a, b| { - a.get_hash().as_bytes().cmp(b.get_hash().as_bytes()) - }); - if !quiet { - eprintln!( - "[Env::put] section 4/5 named: collected+sorted in {:.1}s, \ - streaming put...", - sec_start.elapsed().as_secs_f64(), - ); + eprintln!("[Env::put] section 5/6 named: {} entries", self.named.len(),); } let put_start = std::time::Instant::now(); put_u64(named_keys.len() as u64, buf); @@ -1300,7 +1479,7 @@ impl Env { } if !quiet { eprintln!( - "[Env::put] section 4/5 named done: put in {:.1}s, total {:.1}s \ + "[Env::put] section 5/6 named done: put in {:.1}s, total {:.1}s \ ({} bytes so far)", put_start.elapsed().as_secs_f64(), sec_start.elapsed().as_secs_f64(), @@ -1309,11 +1488,11 @@ impl Env { } // ───────────────────────────────────────────────────────────────────── - // Section 5: Comms (Address -> Comm) — typically empty on compile path + // Section 6: Comms (Address -> Comm) — typically empty on compile path // ───────────────────────────────────────────────────────────────────── let sec_start = std::time::Instant::now(); if !quiet { - eprintln!("[Env::put] section 5/5 comms: {} entries", self.comms.len(),); + eprintln!("[Env::put] section 6/6 comms: {} entries", self.comms.len(),); } let mut comm_addrs: Vec
= self.comms.iter().map(|e| e.key().clone()).collect(); @@ -1330,33 +1509,12 @@ impl Env { } if !quiet { eprintln!( - "[Env::put] section 5/5 comms done in {:.1}s ({} bytes so far)", + "[Env::put] section 6/6 comms done in {:.1}s ({} bytes so far)", sec_start.elapsed().as_secs_f64(), buf.len(), ); } - // Optional trailing section: anon_hints (Address -> ReducibilityHints). - // `get_anon` normally HARVESTS hints from the Named section; a closure - // sub-env (built for shard injection) DROPS that section, which would - // force the kernel to `Regular(0)` and add a def-eq overhead vs whole-env - // proving. Carrying the hints here lets the guest reproduce vanilla kernel - // behavior exactly. Written only when populated, so compiler-produced - // `.ixe` (empty map) are byte-identical to before; loaders read it only if - // bytes remain after comms. Hints are performance-only (the `Regular(0)` - // fallback is always correct), so this section is intentionally NOT covered - // by the consts merkle root. - if !self.anon_hints.is_empty() { - let mut hint_addrs: Vec
= - self.anon_hints.keys().cloned().collect(); - hint_addrs.sort_unstable(); - put_u64(hint_addrs.len() as u64, buf); - for addr in &hint_addrs { - put_address(addr, buf); - self.anon_hints[addr].put_ser(buf); - } - } - if !quiet { eprintln!( "[Env::put] ALL DONE: {} bytes in {:.1}s", @@ -1368,46 +1526,25 @@ impl Env { } /// Deserialize an Env from bytes. + /// + /// The whole buffer must be consumed — trailing bytes after the + /// final section are rejected (truncation/concatenation guard). The + /// early-stop readers (`get_anon`, `get_anon_mmap`, + /// `parse_lazy_index`) cannot make that check by design. pub fn get(buf: &mut &[u8]) -> Result { - // Header - let tag = Tag4::get(buf)?; - if tag.flag != Self::FLAG { - return Err(format!( - "Env::get: expected flag 0x{:X}, got 0x{:X}", - Self::FLAG, - tag.flag - )); - } - if tag.size != 0 { - return Err(format!( - "Env::get: expected Env variant 0, got {}", - tag.size - )); - } - - // Canonical merkle root (fixed 32 bytes). For empty const sets the - // stored value is `zero_address()`. Verified against the - // recomputed value at the end of deserialization. - let stored_root = get_address(buf)?; + // Header: tag + stored merkle root (verified at the end against + // the recomputed root; empty const sets store `zero_address()`) + + // bundle fields. + let (stored_root, main, assumptions) = read_env_header(buf, "Env::get")?; #[cfg_attr(not(target_arch = "riscv64"), allow(unused_mut))] let mut env = Env::new(); + env.main = main; + env.assumptions = assumptions.into_iter().collect(); - // Section 1: Blobs - let num_blobs = get_u64(buf)?; - for _ in 0..num_blobs { - let addr = get_address(buf)?; - let len = get_u64(buf)? as usize; - if buf.len() < len { - return Err(format!( - "Env::get: need {} bytes for blob, have {}", - len, - buf.len() - )); - } - let (bytes, rest) = buf.split_at(len); - *buf = rest; - env.blobs.insert(addr, bytes.to_vec()); + // Section 1: Blobs (hash-verified per entry) + for (addr, bytes) in read_blob_section(buf, "Env::get")? { + env.blobs.insert(addr, bytes); } // Section 2: Consts (lazy: read length prefix, slice bytes, defer parse) @@ -1443,7 +1580,22 @@ impl Env { .insert(addr, crate::lazy::LazyConstant::from_bytes(bytes.into())); } - // Section 3: Names (build lookup table and reverse index for metadata) + // `main` must reference a constant actually present in the file. + if let Some(m) = &env.main + && env.consts.get(m).is_none() + { + return Err(format!( + "Env::get: main {} not present in consts", + m.hex() + )); + } + + // Section 3: anon_hints + for (addr, hints) in read_hints_section(buf)? { + env.anon_hints.insert(addr, hints); + } + + // Section 4: Names (build lookup table and reverse index for metadata) let num_names = get_u64(buf)?; let mut names_lookup: FxHashMap = FxHashMap::default(); let mut name_reverse_index: NameReverseIndex = @@ -1462,7 +1614,7 @@ impl Env { env.names.insert(addr, name); } - // Section 4: Named (use indexed deserialization for metadata) + // Section 5: Named (use indexed deserialization for metadata) let num_named = get_u64(buf)?; for _ in 0..num_named { let name_addr = get_address(buf)?; @@ -1473,7 +1625,7 @@ impl Env { env.named.insert(name, named); } - // Section 5: Comms + // Section 6: Comms let num_comms = get_u64(buf)?; for _ in 0..num_comms { let addr = get_address(buf)?; @@ -1481,9 +1633,6 @@ impl Env { env.comms.insert(addr, comm); } - // Optional trailing anon_hints section (see `Env::put`). - read_anon_hints_section(buf, &mut env)?; - // Verify the stored merkle root matches what we'd compute from // env.consts. Empty const set → expected = zero_address(). // Rejects any tampering with the header. @@ -1500,6 +1649,15 @@ impl Env { )); } + // Comms is the final section; anything after it is truncation + // damage or concatenated garbage. + if !buf.is_empty() { + return Err(format!( + "Env::get: {} trailing bytes after final section", + buf.len() + )); + } + Ok(env) } @@ -1520,41 +1678,15 @@ impl Env { pub fn parse_lazy_index(data: &[u8]) -> Result { let mut buf: &[u8] = data; - // Header: Tag4 (flag/variant) + canonical merkle root. - let tag = Tag4::get(&mut buf)?; - if tag.flag != Self::FLAG { - return Err(format!( - "parse_lazy_index: expected flag 0x{:X}, got 0x{:X}", - Self::FLAG, - tag.flag - )); - } - if tag.size != 0 { - return Err(format!( - "parse_lazy_index: expected Env variant 0, got {}", - tag.size - )); - } - let stored_root = get_address(&mut buf)?; + // Header: tag + merkle root + bundle fields. + let (stored_root, main, assumptions) = + read_env_header(&mut buf, "parse_lazy_index")?; - let mut index = LazyIndex::default(); + let mut index = LazyIndex { main, assumptions, ..LazyIndex::default() }; - // Section 1: Blobs (copied — small, and the kernel ingests their bytes). - let num_blobs = get_u64(&mut buf)?; - for _ in 0..num_blobs { - let addr = get_address(&mut buf)?; - let len = get_u64(&mut buf)? as usize; - if buf.len() < len { - return Err(format!( - "parse_lazy_index: need {} bytes for blob, have {}", - len, - buf.len() - )); - } - let (bytes, rest) = buf.split_at(len); - buf = rest; - index.blobs.push((addr, bytes.to_vec())); - } + // Section 1: Blobs (copied — small, and the kernel ingests their + // bytes; hash-verified per entry). + index.blobs = read_blob_section(&mut buf, "parse_lazy_index")?; // Section 2: Consts — record offset windows, never parse the bodies. let num_consts = get_u64(&mut buf)?; @@ -1574,7 +1706,13 @@ impl Env { index.consts.push(LazyConstSlice { addr, offset, len }); } - // Section 3: Names — parsed to build the index for metadata decoding, then + // Section 3: anon_hints — the canonical hint channel (the writer + // always emits it, deriving from Named metadata when needed), so + // `LazyNamed.hint` is a plain lookup instead of a metadata harvest. + let hints_map: FxHashMap = + read_hints_section(&mut buf)?.into_iter().collect(); + + // Section 4: Names — parsed to build the index for metadata decoding, then // dropped (the check never consults the name component table). let num_names = get_u64(&mut buf)?; let mut names_lookup: FxHashMap = FxHashMap::default(); @@ -1589,8 +1727,9 @@ impl Env { names_lookup.insert(addr, name); } - // Section 4: Named — keep `name → addr` and the Defn reducibility hint; the - // full ConstantMeta (arena, CallSite, ...) is parsed then discarded. + // Section 5: Named — keep `name → addr` plus the §3 hint for that + // address; the full ConstantMeta (arena, CallSite, ...) is parsed + // then discarded. let num_named = get_u64(&mut buf)?; for _ in 0..num_named { let name_addr = get_address(&mut buf)?; @@ -1598,14 +1737,11 @@ impl Env { let name = names_lookup.get(&name_addr).cloned().ok_or_else(|| { format!("parse_lazy_index: missing name for addr {:?}", name_addr) })?; - let hint = match &named.meta.info { - super::metadata::ConstantMetaInfo::Def { hints, .. } => Some(*hints), - _ => None, - }; + let hint = hints_map.get(&named.addr).copied(); index.named.push(LazyNamed { name, addr: named.addr, hint }); } - // Section 5 (comms) is skipped: not needed by the check path. + // Section 6 (comms) is never reached: not needed by the check path. // Re-verify the merkle root over const addresses (header integrity). let mut const_addrs: Vec
= @@ -1621,63 +1757,50 @@ impl Env { )); } + // `main` must reference a constant present in the file. + if let Some(m) = &index.main + && const_addrs.binary_search(m).is_err() + { + return Err(format!( + "parse_lazy_index: main {} not present in consts", + m.hex() + )); + } + Ok(index) } - /// Anonymous-only deserialization: read the header + blobs + - /// consts sections, parse-and-discard the metadata sections - /// (names / named / comms). + /// Anonymous-only deserialization: read the header + §1 blobs + + /// §2 consts + §3 anon_hints, then STOP — the metadata sections + /// (§4 names / §5 named / §6 comms) are laid out after the hints + /// precisely so this reader never has to touch them. /// - /// Returns an `Env` with populated `consts` (lazy) and `blobs`, and - /// **empty** `named` / `names` / `comms`. The merkle-root header is - /// re-verified against the recomputed root over `consts.keys()`, - /// exactly as in [`Env::get`]. + /// Returns an `Env` with populated `consts` (lazy), `blobs`, and + /// `anon_hints`, and **empty** `named` / `names` / `comms`. The + /// merkle-root header is re-verified against the recomputed root + /// over `consts.keys()`, exactly as in [`Env::get`]. Because the + /// cursor stops at §3, trailing-buffer checks are impossible here + /// by design — only the full [`Env::get`] enforces EOF. /// - /// Why "parse and discard"? Sections 3-5 lack a section-level length - /// prefix today (only section 2 has one), so we can't byte-skip - /// them without parsing. Parsing into local scopes that drop on - /// return still wins us the steady-state memory: the returned `Env` - /// is metadata-free, and the temporary lookup tables / parsed - /// metadata values are reclaimed before this function returns. + /// Hints come straight from §3: the writer always emits that + /// section, deriving it from Named metadata when the in-memory map + /// is empty, so the historical read-time harvest from §Named is + /// gone. /// /// Used by the anon-mode kernel path so a verifier holding only - /// content addresses doesn't pay the long-term cost of metadata - /// sections it will never consult. + /// content addresses doesn't pay for metadata it will never consult. pub fn get_anon(buf: &mut &[u8]) -> Result { // Header (same as Env::get) - let tag = Tag4::get(buf)?; - if tag.flag != Self::FLAG { - return Err(format!( - "Env::get_anon: expected flag 0x{:X}, got 0x{:X}", - Self::FLAG, - tag.flag - )); - } - if tag.size != 0 { - return Err(format!( - "Env::get_anon: expected Env variant 0, got {}", - tag.size - )); - } - let stored_root = get_address(buf)?; + let (stored_root, main, assumptions) = + read_env_header(buf, "Env::get_anon")?; let mut env = Env::new(); + env.main = main; + env.assumptions = assumptions.into_iter().collect(); - // Section 1: Blobs (kept) - let num_blobs = get_u64(buf)?; - for _ in 0..num_blobs { - let addr = get_address(buf)?; - let len = get_u64(buf)? as usize; - if buf.len() < len { - return Err(format!( - "Env::get_anon: need {} bytes for blob, have {}", - len, - buf.len() - )); - } - let (bytes, rest) = buf.split_at(len); - *buf = rest; - env.blobs.insert(addr, bytes.to_vec()); + // Section 1: Blobs (kept; hash-verified per entry) + for (addr, bytes) in read_blob_section(buf, "Env::get_anon")? { + env.blobs.insert(addr, bytes); } // Section 2: Consts (kept, lazy) @@ -1707,55 +1830,27 @@ impl Env { .insert(addr, crate::lazy::LazyConstant::from_bytes(bytes.into())); } - // Section 3: Names — parse and DISCARD. We still need a populated - // `names_lookup` and `name_reverse_index` so section 4's indexed - // metadata parses correctly, but both go out of scope before - // returning so the steady-state `Env` carries no name data. - let num_names = get_u64(buf)?; - let mut names_lookup: FxHashMap = FxHashMap::default(); - let mut name_reverse_index: NameReverseIndex = - Vec::with_capacity(num_names as usize + 1); - let anon_addr = Address::from_blake3_hash(*Name::anon().get_hash()); - names_lookup.insert(anon_addr, Name::anon()); - for _ in 0..num_names { - let addr = get_address(buf)?; - let name = get_name_component(buf, &names_lookup)?; - name_reverse_index.push(addr.clone()); - names_lookup.insert(addr, name); - } - - // Section 4: Named — parse and mostly discard, but harvest - // `ReducibilityHints` from each `Def` variant into `env.anon_hints`. - // Hints are performance advice (lazy-delta tiebreak); the kernel's - // anon-mode correctness model is preserved either way. Without - // them, every Definition is forced to `Regular(0)` and the kernel - // can chew through `MAX_WHNF_FUEL` on definitions Lean would have - // marked `Abbrev`/`Regular(h)`. - let num_named = get_u64(buf)?; - for _ in 0..num_named { - let _name_addr = get_address(buf)?; - let named = get_named_indexed(buf, &name_reverse_index)?; - if let super::metadata::ConstantMetaInfo::Def { hints, .. } = - &named.meta.info - { - env.anon_hints.insert(named.addr.clone(), *hints); - } + // `main` must reference a constant actually present in the file. + if let Some(m) = &env.main + && env.consts.get(m).is_none() + { + return Err(format!( + "Env::get_anon: main {} not present in consts", + m.hex() + )); } - // Section 5: Comms — parse and DISCARD. - let num_comms = get_u64(buf)?; - for _ in 0..num_comms { - let _addr = get_address(buf)?; - let _comm = Comm::get(buf)?; + // Section 3: anon_hints. Hints are performance advice (lazy-delta + // tiebreak); the kernel's anon-mode correctness model is preserved + // either way. Without them, every Definition is forced to + // `Regular(0)` and the kernel can chew through `MAX_WHNF_FUEL` on + // definitions Lean would have marked `Abbrev`/`Regular(h)`. + for (addr, hints) in read_hints_section(buf)? { + env.anon_hints.insert(addr, hints); } - // Optional trailing anon_hints section (see `Env::put`). For a closure - // sub-env this carries the hints the dropped Named section would have, so - // the kernel reproduces vanilla behavior with no def-eq overhead. - read_anon_hints_section(buf, &mut env)?; - - drop(names_lookup); - drop(name_reverse_index); + // Sections 4-6 (names / named / comms) are laid out after the + // hints precisely so this reader can stop here. // Verify merkle root over loaded consts. let mut const_addrs: Vec
= @@ -1785,11 +1880,10 @@ impl Env { /// the mapping stays alive as long as any consumer holds the env or /// any clone of a `LazyConstant` from it. /// - /// Sections 1 (blobs), 3 (names), 4 (named), and 5 (comms) are - /// handled the same way as `get_anon`: blobs are heap-copied (they - /// are small and consumed eagerly), names and named are - /// parse-and-discard (with hints harvested into `env.anon_hints`), - /// comms are skipped. + /// Sections are handled the same way as `get_anon`: §1 blobs are + /// heap-copied and hash-verified (small, eagerly consumed), §3 + /// anon_hints is read into `env.anon_hints`, and the reader stops + /// there — the metadata sections (§4-§6) are never touched. /// /// On Linux, the kernel's adaptive readahead handles the linear /// scan during section parsing efficiently; subsequent random @@ -1846,39 +1940,17 @@ impl Env { let mut buf: &[u8] = mmap_full; // Header (same shape as Env::get_anon) - let tag = Tag4::get(&mut buf)?; - if tag.flag != Self::FLAG { - return Err(format!( - "Env::get_anon_mmap: expected flag 0x{:X}, got 0x{:X}", - Self::FLAG, - tag.flag - )); - } - if tag.size != 0 { - return Err(format!( - "Env::get_anon_mmap: expected Env variant 0, got {}", - tag.size - )); - } - let stored_root = get_address(&mut buf)?; + let (stored_root, main, assumptions) = + read_env_header(&mut buf, "Env::get_anon_mmap")?; let mut env = Env::new(); + env.main = main; + env.assumptions = assumptions.into_iter().collect(); - // Section 1: Blobs (heap-copied; small, eagerly consumed) - let num_blobs = get_u64(&mut buf)?; - for _ in 0..num_blobs { - let addr = get_address(&mut buf)?; - let len = get_u64(&mut buf)? as usize; - if buf.len() < len { - return Err(format!( - "Env::get_anon_mmap: need {} bytes for blob, have {}", - len, - buf.len() - )); - } - let (bytes, rest) = buf.split_at(len); - buf = rest; - env.blobs.insert(addr, bytes.to_vec()); + // Section 1: Blobs (heap-copied; small, eagerly consumed; + // hash-verified per entry) + for (addr, bytes) in read_blob_section(&mut buf, "Env::get_anon_mmap")? { + env.blobs.insert(addr, bytes); } // Section 2: Consts (mmap-backed lazy windows) @@ -1916,47 +1988,23 @@ impl Env { buf = &buf[len..]; } - // Section 3: Names — parse and DISCARD (needed transiently so - // section 4's indexed metadata can be decoded). - let num_names = get_u64(&mut buf)?; - let mut names_lookup: FxHashMap = FxHashMap::default(); - let mut name_reverse_index: NameReverseIndex = - Vec::with_capacity(num_names as usize + 1); - let anon_addr = Address::from_blake3_hash(*Name::anon().get_hash()); - names_lookup.insert(anon_addr, Name::anon()); - for _ in 0..num_names { - let addr = get_address(&mut buf)?; - let name = get_name_component(&mut buf, &names_lookup)?; - name_reverse_index.push(addr.clone()); - names_lookup.insert(addr, name); - } - - // Section 4: Named — harvest `ReducibilityHints` from `Def` - // entries into `env.anon_hints`; discard the rest. See `get_anon` - // for the rationale. - let num_named = get_u64(&mut buf)?; - for _ in 0..num_named { - let _name_addr = get_address(&mut buf)?; - let named = get_named_indexed(&mut buf, &name_reverse_index)?; - if let super::metadata::ConstantMetaInfo::Def { hints, .. } = - &named.meta.info - { - env.anon_hints.insert(named.addr.clone(), *hints); - } + // `main` must reference a constant actually present in the file. + if let Some(m) = &env.main + && env.consts.get(m).is_none() + { + return Err(format!( + "Env::get_anon_mmap: main {} not present in consts", + m.hex() + )); } - // Section 5: Comms — parse and DISCARD. - let num_comms = get_u64(&mut buf)?; - for _ in 0..num_comms { - let _addr = get_address(&mut buf)?; - let _comm = Comm::get(&mut buf)?; + // Section 3: anon_hints — see `get_anon` for the rationale. + for (addr, hints) in read_hints_section(&mut buf)? { + env.anon_hints.insert(addr, hints); } - // Optional trailing anon_hints section (see `Env::put`). - read_anon_hints_section(&mut buf, &mut env)?; - - drop(names_lookup); - drop(name_reverse_index); + // Sections 4-6 (names / named / comms) are laid out after the + // hints precisely so this reader can stop here. // Verify merkle root over loaded consts (same as get_anon). let mut const_addrs: Vec
= @@ -1982,20 +2030,29 @@ impl Env { Ok(buf.len()) } - /// Calculate serialized size with breakdown by section. + /// Calculate serialized size with breakdown by section: + /// `(header, blobs, consts, anon_hints, names, named, comms)`. pub fn serialized_size_breakdown( &self, - ) -> Result<(usize, usize, usize, usize, usize, usize), String> { + ) -> Result<(usize, usize, usize, usize, usize, usize, usize), String> { let mut buf = Vec::new(); - // Header + merkle root (matches Env::put layout; root is always - // 32 bytes, with `zero_address()` as the empty-env sentinel). + // Header: tag + merkle root (32 bytes, `zero_address()` sentinel + // for empty const sets) + bundle fields (matches Env::put layout). Tag4::new(Self::FLAG, 0).put(&mut buf); let mut const_addrs: Vec
= self.consts.iter().map(|e| e.key().clone()).collect(); const_addrs.sort_unstable(); let root = merkle_root_canonical(&const_addrs).unwrap_or_else(zero_address); put_address(&root, &mut buf); + put_opt_addr(&self.main, &mut buf); + let mut assumption_addrs: Vec
= + self.assumptions.iter().cloned().collect(); + assumption_addrs.sort_unstable(); + put_u64(assumption_addrs.len() as u64, &mut buf); + for addr in &assumption_addrs { + put_address(addr, &mut buf); + } let header_size = buf.len(); // Section 1: Blobs @@ -2018,7 +2075,32 @@ impl Env { } let consts_size = buf.len() - before_consts; - // Section 3: Names (also build name index) + // Section 3: anon_hints (mirrors Env::put's derive-from-named rule) + let before_hints = buf.len(); + let mut hint_pairs: Vec<(Address, ReducibilityHints)> = + if self.anon_hints.is_empty() { + let mut derived: FxHashMap = + FxHashMap::default(); + for entry in self.named.iter() { + if let super::metadata::ConstantMetaInfo::Def { hints, .. } = + &entry.value().meta.info + { + derived.entry(entry.value().addr.clone()).or_insert(*hints); + } + } + derived.into_iter().collect() + } else { + self.anon_hints.iter().map(|(a, h)| (a.clone(), *h)).collect() + }; + hint_pairs.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + put_u64(hint_pairs.len() as u64, &mut buf); + for (addr, hints) in &hint_pairs { + put_address(addr, &mut buf); + hints.put_ser(&mut buf); + } + let hints_size = buf.len() - before_hints; + + // Section 4: Names (also build name index) let before_names = buf.len(); let sorted_names = topological_sort_names(&self.names); let mut name_index: NameIndex = NameIndex::new(); @@ -2030,7 +2112,7 @@ impl Env { } let names_size = buf.len() - before_names; - // Section 4: Named (use indexed serialization) + // Section 5: Named (use indexed serialization) let before_named = buf.len(); put_u64(self.named.len() as u64, &mut buf); for entry in self.named.iter() { @@ -2039,7 +2121,7 @@ impl Env { } let named_size = buf.len() - before_named; - // Section 5: Comms + // Section 6: Comms let before_comms = buf.len(); put_u64(self.comms.len() as u64, &mut buf); for entry in self.comms.iter() { @@ -2052,6 +2134,7 @@ impl Env { header_size, blobs_size, consts_size, + hints_size, names_size, named_size, comms_size, @@ -2224,7 +2307,7 @@ mod tests { } fn gen_env(g: &mut Gen) -> Env { - let env = Env::new(); + let mut env = Env::new(); // Generate blobs let num_blobs = gen_range(g, 0..10); @@ -2246,12 +2329,14 @@ mod tests { // Generate constants and named entries let num_consts = gen_range(g, 0..10); + let mut const_addrs: Vec
= Vec::new(); for i in 0..num_consts { let constant = gen_constant(g); let mut buf = Vec::new(); constant.put(&mut buf); let addr = Address::hash(&buf); env.store_const(addr.clone(), constant); + const_addrs.push(addr.clone()); // Create a named entry for this constant if !names.is_empty() { @@ -2284,6 +2369,34 @@ mod tests { env.comms.insert(addr, comm); } + // Bundle fields. `main` must reference a stored const — `Env::put` + // validates that; `assumptions` are opaque addresses (no content + // behind them is required), so random ones exercise the encoding. + if !const_addrs.is_empty() && bool::arbitrary(g) { + let idx = usize::arbitrary(g) % const_addrs.len(); + env.main = Some(const_addrs[idx].clone()); + } + let num_assumptions = gen_range(g, 0..5); + for _ in 0..num_assumptions { + env.assumptions.insert(Address::arbitrary(g)); + } + + // Explicit anon_hints (keyed by arbitrary addresses — the map is + // advisory). When left empty, `Env::put` derives §3 from Named + // metadata instead; gen_env metas are all `Empty`, so that + // derivation contributes nothing and roundtrip equality holds + // either way. + let num_hints = gen_range(g, 0..4); + for _ in 0..num_hints { + let variant: u8 = Arbitrary::arbitrary(g); + let hint = match variant % 3 { + 0 => ReducibilityHints::Opaque, + 1 => ReducibilityHints::Abbrev, + _ => ReducibilityHints::Regular(Arbitrary::arbitrary(g)), + }; + env.anon_hints.insert(Address::arbitrary(g), hint); + } + env } @@ -2382,6 +2495,32 @@ mod tests { } } + // Bundle fields + hints (gen_env metas are all Empty, so §3 + // derivation contributes nothing and plain equality holds). + if env.main != recovered.main { + eprintln!( + "main mismatch: {:?} vs {:?}", + env.main, recovered.main + ); + return false; + } + if env.assumptions != recovered.assumptions { + eprintln!( + "assumptions mismatch: {} vs {} entries", + env.assumptions.len(), + recovered.assumptions.len() + ); + return false; + } + if env.anon_hints != recovered.anon_hints { + eprintln!( + "anon_hints mismatch: {} vs {} entries", + env.anon_hints.len(), + recovered.anon_hints.len() + ); + return false; + } + true }, Err(e) => { @@ -2519,21 +2658,39 @@ mod tests { assert!(res.is_err(), "tampered root should be rejected"); } + /// Byte offset of the first constant's payload within a serialized + /// env buffer, computed by walking the header + §1 (blobs) + the §2 + /// prefix with the real parsers — so tampering tests track layout + /// changes instead of hardcoding offsets. + fn first_const_payload_offset(buf: &[u8]) -> usize { + let mut cur: &[u8] = buf; + read_env_header(&mut cur, "test").unwrap(); + read_blob_section(&mut cur, "test").unwrap(); + let n = get_u64(&mut cur).unwrap(); + assert!(n >= 1, "need at least one const to locate"); + let _addr = get_address(&mut cur).unwrap(); + let _len = Tag0::get(&mut cur).unwrap(); + buf.len() - cur.len() + } + + /// Byte offset of the first blob's payload (same technique as + /// [`first_const_payload_offset`]). + fn first_blob_payload_offset(buf: &[u8]) -> usize { + let mut cur: &[u8] = buf; + read_env_header(&mut cur, "test").unwrap(); + let n = get_u64(&mut cur).unwrap(); + assert!(n >= 1, "need at least one blob to locate"); + let _addr = get_address(&mut cur).unwrap(); + let _len = get_u64(&mut cur).unwrap(); + buf.len() - cur.len() + } + /// Flip a byte inside the first const's payload bytes (not its /// stored address): merkle still validates over `consts.keys()`, so /// the per-entry `Address::hash(bytes) == addr` check is what must /// reject this corruption. Without that check, `Env::get` would /// succeed and the failure would surface much later inside /// `LazyConstant::get` with a misleading parse error. - /// - /// Header layout for an env with empty blobs and one const: - /// [0] Tag4 (0xE0) - /// [1..33) merkle root (32 bytes) - /// [33] Section 1 (blobs) count = 0 (Tag0) - /// [34] Section 2 (consts) count = 1 (Tag0) - /// [35..67) const address (32 bytes) - /// [67] Tag0 length of const bytes - /// [68..] const bytes (target for tampering) #[test] fn env_const_bytes_tampering_rejected_by_get() { let env = Env::new(); @@ -2541,7 +2698,7 @@ mod tests { let mut buf = Vec::new(); env.put(&mut buf).unwrap(); // Flip a byte well inside the const payload. - let off = 68 + 3; + let off = first_const_payload_offset(&buf) + 3; assert!(off < buf.len(), "expected const bytes at offset {off}"); buf[off] ^= 0xFF; let res = Env::get(&mut buf.as_slice()); @@ -2558,7 +2715,7 @@ mod tests { env.store_const(Address::hash(b"a"), defn_const(vec![])); let mut buf = Vec::new(); env.put(&mut buf).unwrap(); - let off = 68 + 3; + let off = first_const_payload_offset(&buf) + 3; assert!(off < buf.len()); buf[off] ^= 0xFF; let res = Env::get_anon(&mut buf.as_slice()); @@ -2576,7 +2733,7 @@ mod tests { env.store_const(Address::hash(b"a"), defn_const(vec![])); let mut buf = Vec::new(); env.put(&mut buf).unwrap(); - let off = 68 + 3; + let off = first_const_payload_offset(&buf) + 3; assert!(off < buf.len()); buf[off] ^= 0xFF; // mmap requires a real file @@ -2721,4 +2878,190 @@ mod tests { assert_eq!(pre_a, post_a, "`a` content should be stable across unlink"); assert_ne!(post_a, post_b, "discriminators should still differentiate"); } + + // --------------------------------------------------------------------------- + // Bundle header fields (main / assumptions) + §1/§3 integrity + // --------------------------------------------------------------------------- + + #[test] + fn env_main_and_assumptions_roundtrip_all_readers() { + let mut env = Env::new(); + let a = store_canonical(&env, defn_const(vec![])); + store_canonical(&env, defn_const_discriminator(vec![], 1)); + env.main = Some(a.clone()); + env.assumptions.insert(Address::hash(b"assume-1")); + env.assumptions.insert(Address::hash(b"assume-2")); + + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + + let full = Env::get(&mut buf.as_slice()).unwrap(); + assert_eq!(full.main, Some(a.clone())); + assert_eq!(full.assumptions, env.assumptions); + + let anon = Env::get_anon(&mut buf.as_slice()).unwrap(); + assert_eq!(anon.main, Some(a.clone())); + assert_eq!(anon.assumptions, env.assumptions); + + let index = Env::parse_lazy_index(&buf).unwrap(); + assert_eq!(index.main, Some(a)); + let mut sorted: Vec
= env.assumptions.iter().cloned().collect(); + sorted.sort_unstable(); + assert_eq!(index.assumptions, sorted, "LazyIndex keeps header order"); + } + + #[test] + fn env_put_rejects_main_not_in_consts() { + let mut env = Env::new(); + store_canonical(&env, defn_const(vec![])); + env.main = Some(Address::hash(b"not-a-const")); + let mut buf = Vec::new(); + let err = env + .put(&mut buf) + .expect_err("main outside consts must be rejected at write time"); + assert!(err.contains("main"), "got: {err}"); + } + + /// Flip a byte of the serialized `main` address: the header parses, + /// but the resulting address is (w.h.p.) not a stored constant, so + /// the `main ∈ consts` reader check must reject the file. This test + /// intentionally knows the fixed header prefix (tag byte + 32-byte + /// root — same knowledge as `parse_stored_root`). + #[test] + fn env_get_rejects_tampered_main() { + let mut env = Env::new(); + let a = store_canonical(&env, defn_const(vec![])); + env.main = Some(a); + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + assert_eq!(buf[33], 0x01, "main opt-flag should be Some"); + buf[34] ^= 0xFF; + let err = Env::get(&mut buf.as_slice()) + .expect_err("tampered main must be rejected"); + assert!(err.contains("main"), "got: {err}"); + } + + #[test] + fn env_get_rejects_unsorted_assumptions() { + let mut env = Env::new(); + store_canonical(&env, defn_const(vec![])); + env.assumptions.insert(Address::hash(b"x")); + env.assumptions.insert(Address::hash(b"y")); + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + // Locate the header end with the real parser, then swap the two + // 32-byte assumption addresses in place (the writer sorts them). + let header_len = { + let mut cur: &[u8] = &buf; + read_env_header(&mut cur, "test").unwrap(); + buf.len() - cur.len() + }; + let (lo, hi) = (header_len - 64, header_len - 32); + let first: Vec = buf[lo..hi].to_vec(); + let second: Vec = buf[hi..header_len].to_vec(); + buf[lo..hi].copy_from_slice(&second); + buf[hi..header_len].copy_from_slice(&first); + let err = Env::get(&mut buf.as_slice()) + .expect_err("descending assumptions must be rejected"); + assert!(err.contains("strictly ascending"), "got: {err}"); + } + + #[test] + fn env_blob_tampering_rejected_by_all_readers() { + use std::io::Write; + let env = Env::new(); + store_canonical(&env, defn_const(vec![])); + env.store_blob(b"blob payload".to_vec()); + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + let off = first_blob_payload_offset(&buf) + 2; + buf[off] ^= 0xFF; + + for (reader, res) in [ + ("get", Env::get(&mut buf.as_slice()).err()), + ("get_anon", Env::get_anon(&mut buf.as_slice()).err()), + ("parse_lazy_index", Env::parse_lazy_index(&buf).err()), + ] { + let err = + res.unwrap_or_else(|| panic!("{reader}: tampered blob accepted")); + assert!( + err.contains("blob at idx"), + "{reader}: expected blob verify error, got: {err}" + ); + } + + let tmp = std::env::temp_dir().join("ix_env_blob_tamper_mmap_test.ixe"); + { + let mut f = std::fs::File::create(&tmp).unwrap(); + f.write_all(&buf).unwrap(); + } + let err = Env::get_anon_mmap(&tmp) + .expect_err("get_anon_mmap: tampered blob accepted"); + assert!(err.contains("blob at idx"), "got: {err}"); + std::fs::remove_file(&tmp).ok(); + } + + #[test] + fn env_get_rejects_trailing_garbage_but_get_anon_stops_early() { + let env = Env::new(); + store_canonical(&env, defn_const(vec![])); + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + buf.extend_from_slice(&[0xAB, 0xCD, 0xEF]); + let err = Env::get(&mut buf.as_slice()) + .expect_err("trailing bytes must be rejected by the full reader"); + assert!(err.contains("trailing"), "got: {err}"); + // get_anon stops after §3 and never sees the tail — by design. + Env::get_anon(&mut buf.as_slice()) + .expect("get_anon early-stops before the garbage"); + } + + /// §3 is the canonical hint channel: with an empty `anon_hints` map + /// the writer derives the section from Named `Def` metadata, and an + /// env carrying the same hints explicitly serializes byte-identically. + #[test] + fn env_hints_derived_from_named_metadata() { + use crate::env::Named; + use crate::metadata::{ConstantMetaInfo, ExprMeta}; + + let env = Env::new(); + let const_addr = store_canonical(&env, defn_const(vec![])); + let name = Name::str(Name::anon(), "hinted".to_string()); + let name_addr = Address::from_blake3_hash(*name.get_hash()); + env.names.insert(name_addr.clone(), name.clone()); + let meta = ConstantMeta::new(ConstantMetaInfo::Def { + name: name_addr, + lvls: vec![], + hints: ReducibilityHints::Regular(7), + all: vec![], + ctx: vec![], + arena: ExprMeta::default(), + type_root: 0, + value_root: 0, + }); + env.named.insert( + name, + Named { addr: const_addr.clone(), meta, original: None }, + ); + + // Empty map → §3 derived from Named at write time; the anon + // reader picks it up without ever parsing the Named section. + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + let anon = Env::get_anon(&mut buf.as_slice()).unwrap(); + assert_eq!( + anon.anon_hints.get(&const_addr), + Some(&ReducibilityHints::Regular(7)) + ); + + // Explicit map with identical content → identical bytes. + let mut env2 = env.clone(); + env2.anon_hints.insert(const_addr, ReducibilityHints::Regular(7)); + let mut buf2 = Vec::new(); + env2.put(&mut buf2).unwrap(); + assert_eq!( + buf, buf2, + "derived and explicit hint sections should serialize identically" + ); + } } diff --git a/crates/ixvm-codegen/src/env_handle.rs b/crates/ixvm-codegen/src/env_handle.rs index d92c546c..1e1f6e3d 100644 --- a/crates/ixvm-codegen/src/env_handle.rs +++ b/crates/ixvm-codegen/src/env_handle.rs @@ -6,12 +6,12 @@ //! per-shard FFI calls take a `&EnvHandle` (shared across the batch) so //! the env is parsed exactly once instead of every call. //! -//! `anon_hints` (`Defn` reducibility hints) are populated at handle -//! construction time. `Env::get_anon_mmap` already harvests them; -//! `Env::get` (full-form decode used by the bytes-blob path) does -//! not, so `from_bytes` runs the harvest post-decode. +//! `anon_hints` (`Defn` reducibility hints) come from the `.ixe` §3 +//! hints section, which every writer emits (deriving it from Named +//! metadata when the in-memory map is empty) — both readers used here +//! populate the map directly, so no post-decode harvest is needed. -use ixon::{Env, metadata::ConstantMetaInfo}; +use ixon::Env; pub struct EnvHandle { pub env: Env, @@ -19,38 +19,17 @@ pub struct EnvHandle { impl EnvHandle { /// Load via `Env::get_anon_mmap` (zero-copy mmap of the `.ixe` file). - /// Anon-mode parser already harvests `anon_hints` — no post-pass. pub fn from_ixe_path(path: &std::path::Path) -> Result { let env = Env::get_anon_mmap(path)?; Ok(Self { env }) } /// Decode a serialized env blob (`Ixon.serEnv` output) via - /// `Env::get`, then harvest `anon_hints` from each `Def` named - /// entry. Used by the compiled-Lean-env path where the env is built - /// in Lean memory and serialized for the cross-FFI handoff. + /// `Env::get`. Used by the compiled-Lean-env path where the env is + /// built in Lean memory and serialized for the cross-FFI handoff. pub fn from_bytes(bytes: &[u8]) -> Result { let mut cursor: &[u8] = bytes; - let mut env = Env::get(&mut cursor)?; - // `Env::get` reads named entries but doesn't populate - // `env.anon_hints`. Walk `env.named` and insert each `Def` - // variant's `hints`. Mirrors the in-line harvest inside - // `Env::get_anon`. - let hints: Vec<_> = env - .named - .iter() - .filter_map(|entry| { - let named = entry.value(); - if let ConstantMetaInfo::Def { hints, .. } = &named.meta.info { - Some((named.addr.clone(), *hints)) - } else { - None - } - }) - .collect(); - for (addr, h) in hints { - env.anon_hints.insert(addr, h); - } + let env = Env::get(&mut cursor)?; Ok(Self { env }) } } diff --git a/crates/kernel/src/anon_work.rs b/crates/kernel/src/anon_work.rs index be0aae24..b705da12 100644 --- a/crates/kernel/src/anon_work.rs +++ b/crates/kernel/src/anon_work.rs @@ -193,70 +193,11 @@ pub fn closure_addrs( source: &IxonEnv, roots: &[Address], ) -> std::collections::HashSet
{ - use std::collections::{HashSet, VecDeque}; - - use ixon::constant::{ConstantInfo as CI, MutConst as MC}; - - let proj_block = |info: &CI| -> Option
{ - match info { - CI::IPrj(p) => Some(p.block.clone()), - CI::CPrj(p) => Some(p.block.clone()), - CI::RPrj(p) => Some(p.block.clone()), - CI::DPrj(p) => Some(p.block.clone()), - _ => None, - } - }; - - let mut closure: HashSet
= HashSet::default(); - let mut queue: VecDeque
= VecDeque::new(); - let push = - |closure: &mut HashSet
, q: &mut VecDeque
, a: Address| { - if closure.insert(a.clone()) { - q.push_back(a); - } - }; - for r in roots { - push(&mut closure, &mut queue, r.clone()); - } - while let Some(addr) = queue.pop_front() { - if let Some(c) = source.get_const(&addr) { - // 1. Expr-level refs. - for r in &c.refs { - push(&mut closure, &mut queue, r.clone()); - } - // 2. A projection → its Muts block (structural, not in `refs`). - if let Some(b) = proj_block(&c.info) { - push(&mut closure, &mut queue, b); - } - // 3. A Muts block → ALL its member + constructor projection entries. - // Ingressing a block (`ingress_anon_block`) computes these projection - // addresses and requires them present, even when nothing references them - // directly via `refs`. Mirror `build_anon_work`'s enumeration so the - // sub-env carries them (else the guest fails with "computed CPrj address - // … not present in env"). - if let CI::Muts(members) = &c.info { - for (i, m) in members.iter().enumerate() { - let i = i as u64; - let member_addr = match m { - MC::Defn(_) => anon_defn_proj_addr(&addr, i), - MC::Indc(_) => anon_indc_proj_addr(&addr, i), - MC::Recr(_) => anon_recr_proj_addr(&addr, i), - }; - push(&mut closure, &mut queue, member_addr); - if let MC::Indc(ind) = m { - for cidx in 0..ind.ctors.len() as u64 { - push( - &mut closure, - &mut queue, - anon_ctor_proj_addr(&addr, i, cidx), - ); - } - } - } - } - } - } - closure + // The traversal lives in `ixon` (`Env::bfs_closure`) so bundle + // pruning/validation and the shard sub-env share one edge + // definition; this wrapper keeps the kernel-side std-HashSet + // signature. + source.bfs_closure(roots).into_iter().collect() } /// Build a closure sub-env: serialize only the BFS dependency closure of @@ -265,9 +206,9 @@ pub fn closure_addrs( /// per-const integrity check (`hash(bytes) == addr`) and the env merkle root /// still hold. The guest decodes this instead of the whole env, so it pays only /// its closure's decode — essential for envs that don't fit the guest whole -/// (Init, 184 MB doesn't fit the 512 MB Zisk guest). Empty `anon_hints` (no -/// metadata section) is performance-only: ingress falls back to `Regular(0)`, -/// so the typecheck result — and thus the committed claim — is unchanged. +/// (Init, 184 MB doesn't fit the 512 MB Zisk guest). Missing hints are +/// performance-only: ingress falls back to `Regular(0)`, so the typecheck +/// result — and thus the committed claim — is unchanged. /// External refs (not in `source`) are omitted and remain open assumptions, /// exactly as in whole-env. /// @@ -289,9 +230,10 @@ pub fn build_sub_env( } // else: external ref absent from `source` — omit; stays an open assumption. // Carry the constant's reducibility hint so the guest reproduces vanilla - // kernel behavior. `get_anon` normally harvests hints from the Named - // section, which this sub-env drops; without them the kernel forces - // `Regular(0)` and does extra def-eq work (the ~30% check overhead). + // kernel behavior. The sub-env has no Named section to derive the §3 + // hints from at serialization time, so populate the map explicitly; + // without hints the kernel forces `Regular(0)` and does extra def-eq + // work (the ~30% check overhead). if let Some(h) = source.anon_hints.get(addr) { sub.anon_hints.insert(addr.clone(), *h); } diff --git a/crates/kernel/src/ingress.rs b/crates/kernel/src/ingress.rs index 5b17aa01..1db92b19 100644 --- a/crates/kernel/src/ingress.rs +++ b/crates/kernel/src/ingress.rs @@ -3682,9 +3682,19 @@ fn drop_ixon_env(_ixon_env: IxonEnv, _quiet: bool) { fn drop_ixon_env(ixon_env: IxonEnv, quiet: bool) { let total_start = Instant::now(); // `anon_hints` is a small FxHashMap (one entry per Def from the .ixe's - // Named metadata); dropping it inline alongside the bookkeeping below - // is negligible compared to the DashMap dropdance. - let IxonEnv { consts, named, blobs, names, comms, anon_hints: _ } = ixon_env; + // hints section); `main`/`assumptions` are a single address and a small + // set. Dropping them inline alongside the bookkeeping below is + // negligible compared to the DashMap dropdance. + let IxonEnv { + consts, + named, + blobs, + names, + comms, + anon_hints: _, + main: _, + assumptions: _, + } = ixon_env; let consts_len = consts.len(); let named_len = named.len(); let names_len = names.len(); diff --git a/docs/Ixon.md b/docs/Ixon.md index f8a2798e..70418060 100644 --- a/docs/Ixon.md +++ b/docs/Ixon.md @@ -802,15 +802,22 @@ plus `.ixe`. See `src/ix/ixon/serialize.rs::Env::put` for the byte-level layout. The .ixe layout is a Tag4(0xE, 0) header byte followed by a 32-byte -canonical merkle root and then 5 sections: +canonical merkle root, the bundle header fields, and then 6 sections +(hot data first, metadata last): ``` -Header: Tag4 { flag: 0xE, size: 0 } -- one byte (0xE0) -Root: 32 bytes -- canonical merkle root over - consts.keys(); for empty - const sets this is the - fixed `zero_address` - sentinel. +Header: Tag4 { flag: 0xE, size: 0 } -- one byte (0xE0) +Root: 32 bytes -- canonical merkle root over + consts.keys(); for empty + const sets this is the + fixed `zero_address` + sentinel. +Main: 1 byte (0x00 | 0x01) + 32 bytes if 0x01 + -- optional bundle root; see + "Bundles" below. +Assumptions: count (Tag0) + [Address (32 bytes)]* + -- strictly ascending; the + bundle trust boundary. ``` The root is mandatory (non-optional): every env has a unique canonical @@ -819,12 +826,23 @@ produce byte-identical roots regardless of construction order. Deserialization recomputes the root from `consts` and rejects any mismatch as tampered. +`main` and `assumptions` are NOT covered by the consts merkle root — +`main` is a convenience pointer (readers verify `main ∈ consts`; +consumers holding an externally-expected address must compare), and +the assumptions root, when a claim needs it, is recomputed as +`merkle_root_canonical(leaves)` from the section (identical to the +root `AssumptionTree::canonical` produces over the same leaves). + **Section 1: Blobs** (Address → raw bytes) ``` count (Tag0) [Address (32 bytes) + len (Tag0) + bytes]* ``` +Every reader verifies `blake3(bytes) == addr` per blob entry — a +swapped blob would otherwise silently change a Nat/String literal's +value under an otherwise-valid file. + **Section 2: Constants** (Address → length-prefixed Constant bytes) ``` count (Tag0) @@ -833,42 +851,85 @@ count (Tag0) The Tag0 length sidecar is a **section-level** framing byte: it is not part of the constant's content-addressed bytes. The address is -computed as `blake3` over only the Tag4 constant body. This layout -lets a lazy loader slice each constant directly into a -[`LazyConstant`](../src/ix/ixon/lazy.rs) without parsing its Tag4 -envelope, deferring full deserialization until first access. The -materialized `Constant` is cached so subsequent accesses are free. - -### Anonymous-only loading +computed as `blake3` over only the Tag4 constant body (verified per +entry on load). This layout lets a lazy loader slice each constant +directly into a [`LazyConstant`](../crates/ixon/src/lazy.rs) without +parsing its Tag4 envelope, deferring full deserialization until first +access. -`Env::get_anon` (`src/ix/ixon/serialize.rs`) is a sibling of -`Env::get` that loads only the anonymous sections — header, blobs, -consts — and parses-and-drops the metadata sections (names, named, -comms). The returned `Env` has empty `named`/`names`/`comms` and is -suitable for anon-mode kernel workflows that never consult metadata. -Steady-state memory for a Mathlib-scale env drops from ~3-4 GB -(structured + metadata) to ~1 GB (lazy bytes only). +**Section 3: Reducibility hints** (Address → ReducibilityHints) +``` +count (Tag0) +[Address (32 bytes) + ReducibilityHints]* +``` -Exposed to Lean via `rs_de_env_anon` (`Ix.Ixon.rsDeEnvAnon`). +The canonical hint channel for the anon/lazy readers, keyed by +constant address and sorted ascending. Writers ALWAYS emit this +section: when the in-memory hint map is empty (the compile path — +hints live in Named metadata), the section is derived from the Named +entries' `Def` metadata at write time. Hints are performance-only +advice (the `Regular(0)` fallback is always correct) and are +intentionally outside the consts merkle root. -**Section 3: Names** (Address → NameComponent, topologically sorted) +**Section 4: Names** (Address → NameComponent, topologically sorted) ``` count (Tag0) [Address (32 bytes) + NameComponent]* ``` -**Section 4: Named** (Name Address → Named with indexed metadata) +**Section 5: Named** (Name Address → Named with indexed metadata) ``` count (Tag0) [NameAddress (32 bytes) + ConstAddress (32 bytes) + ConstantMeta]* ``` -**Section 5: Commitments** (Address → Comm) +**Section 6: Commitments** (Address → Comm) ``` count (Tag0) [Address (32 bytes) + secret_addr (32 bytes) + payload_addr (32 bytes)]* ``` +The full reader (`Env::get` / Lean `getEnv`) rejects trailing bytes +after Section 6. + +### Anonymous-only loading + +`Env::get_anon` (`crates/ixon/src/serialize.rs`) is a sibling of +`Env::get` that reads the header, §1 blobs, §2 consts, and §3 hints, +then STOPS — the metadata sections are laid out after the hints +precisely so anon readers never touch them. The returned `Env` has +empty `named`/`names`/`comms` and is suitable for anon-mode kernel +workflows that never consult metadata. Steady-state memory for a +Mathlib-scale env drops from ~3-4 GB (structured + metadata) to ~1 GB +(lazy bytes only). + +Exposed to Lean via `rs_de_env_anon` (`Ix.Ixon.rsDeEnvAnon`); +`Env::get_anon_mmap` is the zero-copy mmap sibling and +`Env::parse_lazy_index` the zero-copy index variant (reads through §5, +skipping only comms). + +### Bundles: pinning a single value + +A **bundle** is an `.ixe` whose `main` points at a distinguished +constant (e.g. an anonymous `Defn` wrapping a value, produced by +`Ix.Commit.compileDef`) and whose contents are closed up to +`assumptions`. Because a constant's address is a merkle root over its +entire dependency DAG (refs tables hold addresses of constants and +literal blobs, recursively), `main`'s 32 bytes alone pin the value; +the bundle is the data-availability artifact that ships the bytes +behind those addresses. + +- `Env::prune_to_closure(main, assumed)` builds a bundle: the 3-edge + closure of `main` (Expr refs; projection → `Muts` block; `Muts` + block → member/constructor projections), cut at `assumed`, carrying + constants (genuine bytes), blobs, per-constant hints, and display + metadata (named entries, name components + string blobs, `DataValue` + payload blobs, `meta_refs` extension edges, aux_gen originals). +- `Env::validate_closed()` is the receiver-side check: `main ∈ consts` + and every reachable address is carried (consts ∪ blobs) or assumed. +- Whole-environment files are the degenerate case (`main` absent, + `assumptions` empty). + --- ## Proofs and Claims @@ -881,7 +942,7 @@ fits in single-byte tags (sizes 0..=7 per flag). | Size | Byte | Type | Payload | |------|------|------|---------| -| 0 | `0xE0` | Environment | bare 32-byte merkle root + 5 sections | +| 0 | `0xE0` | Environment | 32-byte merkle root + main/assumptions + 6 sections | | 1 | `0xE1` | Commitment | 2 addr: secret, payload | | 2 | `0xE2` | AssumptionTree | recursive merkle-tree body (see below) | | 3 | `0xE3` | Eval claim | 2 addr (input, output) + opt assumptions |