From d7d25c95fffbfecdf214c86ff7bda68bedd25070 Mon Sep 17 00:00:00 2001 From: lmoresi Date: Sun, 28 Jun 2026 10:31:33 +1000 Subject: [PATCH 01/15] feat(solvers): custom geometric-MG prolongation hook (scalar) Add set_custom_mg() + utilities/custom_mg.py: drive PCMG geometric multigrid with a prolongation we build ourselves (barycentric or RBF) from independent, possibly non-nested coarse meshes. Coarse operators come from Galerkin RAP (P^T A P), so only the prolongation list is supplied. This decouples geometric multigrid from a nested refine() hierarchy. Mechanism: inject the custom P before the first PCSetUp (avoids the MatProductReplaceMats shape error a live matrix swap triggers under Galerkin), and KSPSetDMActive(OPERATOR, False) so PETSc uses our explicit P instead of re-deriving DM-hierarchy interpolation. Finest level reduced to the BC-eliminated global ordering; coarse levels use full DOF coords. Validated (scalar Poisson, refinement=2 box): custom barycentric P from independent coarse meshes reaches FMG iteration counts (1-3 vs FMG 2) and the same solution. On a mesh with NO refine hierarchy (FMG unavailable -> GAMG, 13 iters) custom-P geometric MG converges in ~9 iters. test_1015 (4 tests); test_1014 (11) still pass. Single-field (scalar/vector) only; Stokes velocity-block path is a follow-up. Underworld development team with AI support from Claude Code --- .../cython/petsc_generic_snes_solvers.pyx | 46 ++++ src/underworld3/utilities/custom_mg.py | 208 ++++++++++++++++++ tests/test_1015_custom_mg_prolongation.py | 77 +++++++ 3 files changed, 331 insertions(+) create mode 100644 src/underworld3/utilities/custom_mg.py create mode 100644 tests/test_1015_custom_mg_prolongation.py diff --git a/src/underworld3/cython/petsc_generic_snes_solvers.pyx b/src/underworld3/cython/petsc_generic_snes_solvers.pyx index 296e162e..35c57ae3 100644 --- a/src/underworld3/cython/petsc_generic_snes_solvers.pyx +++ b/src/underworld3/cython/petsc_generic_snes_solvers.pyx @@ -93,6 +93,45 @@ class SolverBaseClass(uw_object): # smoother / coarse-solver options with the framework FMG bundle. self._pc_user_override = False + # Custom multigrid prolongation hierarchy (see set_custom_mg / + # utilities.custom_mg). None => standard FMG/GAMG path, unchanged. + self._custom_mg = None + + def set_custom_mg(self, coarse_meshes, kind="barycentric", verbose=False): + r"""Drive geometric multigrid with a prolongation we build ourselves. + + Supplies a sequence of (possibly **non-nested**) coarse meshes from which + a barycentric or RBF prolongation ``P`` is assembled and installed into + the PCMG via ``PC.setMGInterpolation``; coarse operators are formed by + Galerkin RAP. This decouples geometric multigrid from a nested + ``refine()`` hierarchy — it works even when the solver mesh has no + refinement hierarchy at all. + + Parameters + ---------- + coarse_meshes : list of Mesh + Coarsest-first list of coarse meshes (the finest level is the + solver's own mesh). Need not be nested with the solver mesh. + kind : {"barycentric", "rbf"} + Prolongation builder. ``barycentric`` is FE-exact; ``rbf`` is a + polyharmonic RBF (Shepard-normalised). Default ``barycentric``. + verbose : bool + Print the per-level DOF counts at injection. + + Notes + ----- + Single-field (scalar/vector) solvers only for now; the Stokes + velocity-block path is a separate increment. See + :mod:`underworld3.utilities.custom_mg`. + """ + if kind not in ("barycentric", "rbf"): + raise ValueError("kind must be 'barycentric' or 'rbf'") + if not coarse_meshes: + raise ValueError("coarse_meshes must be a non-empty coarsest-first list") + self._custom_mg = {"coarse_meshes": list(coarse_meshes), + "kind": kind, "verbose": verbose} + self.is_setup = False + def add_update_callback(self, callback): r"""Register a callback fired at the start of every nonlinear (SNES) iteration. @@ -2662,6 +2701,13 @@ class SNES_Scalar(SolverBaseClass): # ``constant_nullspace`` was set. self._attach_constant_nullspace() + # Custom multigrid prolongation: inject our P hierarchy before the + # first PCSetUp (so the Galerkin coarse operators are built from it). + # No-op unless set_custom_mg() was called. + if self._custom_mg is not None: + from underworld3.utilities.custom_mg import inject_custom_mg + inject_custom_mg(self) + # solve self._snes_solve_with_retries(gvec, divergence_retries, verbose) diff --git a/src/underworld3/utilities/custom_mg.py b/src/underworld3/utilities/custom_mg.py new file mode 100644 index 00000000..9ffb5c62 --- /dev/null +++ b/src/underworld3/utilities/custom_mg.py @@ -0,0 +1,208 @@ +""" +Custom geometric-multigrid prolongation injection. + +Build the multigrid prolongation ``P`` **ourselves** (barycentric or RBF) from a +sequence of independent — possibly *non-nested* — coarse meshes, and install it +into a solver's PETSc ``PCMG`` via ``PC.setMGInterpolation``. The coarse +operators are then formed by Galerkin RAP (``Pᵀ A P``), exactly as UW3's FMG +already does. + +Why +--- +UW3's geometric FMG requires a nested ``refine()`` hierarchy with PETSc's exact +nested interpolation. That couples multigrid to uniform refinement and rules out +local adaptation / a dynamic node budget. PETSc's ``PCMG`` does **not** require +its transfer operator to come from the DM hierarchy, however: any prolongation +supplied with ``setMGInterpolation`` is used, and ``pc_mg_galerkin=both`` builds +consistent coarse operators from it. The prolongation "evaluate the coarse FE +function at the fine DOF coordinates" is exactly what the nested path computes — +so we can build it ourselves by barycentric (FE-exact) or RBF interpolation for +*any* coarse/fine pair. This decouples *how the mesh refines* from *what transfer +multigrid uses*. + +Validated (scalar Poisson, refinement=2 box): custom barycentric ``P`` from +independent coarse meshes reaches FMG iteration counts (1–3 vs FMG 2). On a mesh +with **no** refine hierarchy (FMG unavailable → GAMG, 13 iters) custom-P +geometric MG converges in ~9 iters. + +Notes +----- +* The finest level is the solver's real (BC-eliminated) operator space; its + ``P`` rows are reduced to the global/MG ordering. Coarse levels use the full + DOF coordinates of each coarse mesh (no BC reduction needed — Galerkin RAP and + the finest reduction keep the coarse correction consistent). +* Injection happens at solver build time, *before* the first ``PCSetUp``: the + first Galerkin assembly is built from our ``P`` directly, avoiding PETSc's + ``MatProductReplaceMats`` shape error that a live matrix swap triggers. +* ``KSPSetDMActive(OPERATOR, False)`` stops PETSc re-deriving interpolation / + operators from the DM hierarchy, so our explicit ``P`` is used. +""" + +import numpy as np +from petsc4py import PETSc + +__all__ = ["barycentric_prolongation", "rbf_prolongation", "inject_custom_mg"] + + +# --------------------------------------------------------------------------- # +# Prolongation builders (return scipy CSR matrices) +# --------------------------------------------------------------------------- # +def barycentric_prolongation(coarse_coords, fine_coords): + """FE-exact prolongation: each fine DOF interpolated by the coarse element + that contains it (barycentric weights). Partition of unity (row sums = 1); + fine points outside the coarse hull fall back to the nearest coarse DOF.""" + import scipy.sparse as sp + from scipy.spatial import Delaunay, cKDTree + + tri = Delaunay(coarse_coords) + simp = tri.find_simplex(fine_coords) + tree = cKDTree(coarse_coords) + dim = coarse_coords.shape[1] + rows, cols, vals = [], [], [] + for i in range(fine_coords.shape[0]): + s = simp[i] + if s < 0: # outside coarse hull → nearest coarse DOF + rows.append(i); cols.append(int(tree.query(fine_coords[i])[1])); vals.append(1.0) + continue + verts = tri.simplices[s] + T = tri.transform[s] + bary = T[:dim].dot(fine_coords[i] - T[dim]) + w = np.append(bary, 1.0 - bary.sum()) + for k in range(dim + 1): + rows.append(i); cols.append(int(verts[k])); vals.append(float(w[k])) + return sp.csr_matrix((vals, (rows, cols)), + shape=(fine_coords.shape[0], coarse_coords.shape[0])) + + +def rbf_prolongation(coarse_coords, fine_coords, smooth=0.0): + """RBF prolongation: polyharmonic (r² log r) kernel + affine polynomial tail + (reproduces linear fields), Shepard row-normalised to a partition of unity. + Works for arbitrary (non-nested) point sets; software-equivalent to the + barycentric builder as an MG transfer operator.""" + import scipy.sparse as sp + from scipy.spatial.distance import cdist + + def phi(r): + r = np.where(r == 0.0, 1e-30, r) + return r ** 2 * np.log(r) + + nc, dim = coarse_coords.shape + Pc = np.hstack([np.ones((nc, 1)), coarse_coords]) # affine tail + Acc = phi(cdist(coarse_coords, coarse_coords)) + smooth * np.eye(nc) + M = np.block([[Acc, Pc], [Pc.T, np.zeros((dim + 1, dim + 1))]]) + Minv = np.linalg.inv(M) + B = np.hstack([phi(cdist(fine_coords, coarse_coords)), + np.ones((fine_coords.shape[0], 1)), fine_coords]) + Praw = (B @ Minv)[:, :nc] + rs = Praw.sum(axis=1, keepdims=True) + rs[np.abs(rs) < 1e-12] = 1.0 + return sp.csr_matrix(Praw / rs) + + +_BUILDERS = {"barycentric": barycentric_prolongation, "rbf": rbf_prolongation} + + +# --------------------------------------------------------------------------- # +# Coordinate helpers +# --------------------------------------------------------------------------- # +def _to_petsc_aij(csr): + csr = csr.tocsr() + M = PETSc.Mat().createAIJ( + size=csr.shape, + csr=(csr.indptr.astype("int32"), csr.indices.astype("int32"), csr.data), + ) + M.assemble() + return M + + +def _reduce_to_global(dm, full_coords): + """Reduce full local DOF coordinates to the solver's global (BC-eliminated, + MG-ordered) layout by scattering each component through ``localToGlobal`` + (which drops constrained boundary DOFs and reorders to global indices).""" + lvec = dm.getLocalVec() + gvec = dm.getGlobalVec() + cdim = full_coords.shape[1] + if lvec.getLocalSize() != full_coords.shape[0]: + dm.restoreLocalVec(lvec); dm.restoreGlobalVec(gvec) + raise RuntimeError( + f"custom_mg: local DOF count {lvec.getLocalSize()} != coord count " + f"{full_coords.shape[0]} — degree/continuity mismatch") + out = np.zeros((gvec.getSize(), cdim)) + for c in range(cdim): + lvec.array[:] = full_coords[:, c] + dm.localToGlobal(lvec, gvec, addv=False) + out[:, c] = gvec.array + dm.restoreLocalVec(lvec) + dm.restoreGlobalVec(gvec) + return out + + +# --------------------------------------------------------------------------- # +# Injection +# --------------------------------------------------------------------------- # +def inject_custom_mg(solver): + """Configure ``solver``'s PCMG to use our custom prolongation hierarchy. + + Called from the solver's ``solve()`` (after ``_build``, before the SNES + solve) when ``solver._custom_mg`` is set via ``set_custom_mg``. Scalar / + single-field solvers only for now (the Stokes velocity-block path is a + separate increment). + """ + cfg = solver._custom_mg + coarse_meshes = cfg["coarse_meshes"] + kind = cfg["kind"] + builder = _BUILDERS[kind] + + var = solver.Unknowns.u + degree = var.degree + continuous = getattr(var, "continuous", True) + + if solver._pc_option_prefix not in ("", None): + raise NotImplementedError( + "custom_mg currently supports single-field (scalar/vector) solvers; " + f"solver uses PC prefix '{solver._pc_option_prefix}' (e.g. Stokes " + "velocity block) which is a separate increment.") + + # --- per-level DOF coordinates --------------------------------------- # + fine = _reduce_to_global(solver.dm, + solver.mesh._get_coords_for_basis(degree, continuous)) + levels = [m._get_coords_for_basis(degree, continuous) for m in coarse_meshes] + levels.append(fine) + nlev = len(levels) + if nlev < 2: + raise ValueError("custom_mg needs at least one coarse mesh") + + # --- build prolongations P[l]: level l-1 -> level l ------------------ # + Ps = [None] + [_to_petsc_aij(builder(levels[l - 1], levels[l])) + for l in range(1, nlev)] + + # --- configure the geometric MG bundle on the managed block ---------- # + opts = solver.petsc_options + pfx = solver._pc_option_prefix or "" + opts[pfx + "pc_type"] = "mg" + opts[pfx + "pc_mg_type"] = "full" + opts[pfx + "pc_mg_galerkin"] = "both" # coarse operators = PᵀAP + opts[pfx + "mg_levels_ksp_type"] = "richardson" + opts[pfx + "mg_levels_pc_type"] = "sor" + opts[pfx + "mg_coarse_pc_type"] = "redundant" + opts[pfx + "mg_coarse_redundant_pc_type"] = "lu" + for key in ("pc_gamg_type", "pc_gamg_repartition", "pc_gamg_agg_nsmooths"): + opts.delValue(pfx + key) + + # --- inject before the first PCSetUp --------------------------------- # + solver.snes.setUp() + ksp = solver.snes.getKSP() + # Stop PETSc re-deriving interpolation/operators from the DM hierarchy. + ksp.setDMActive(PETSc.KSP.DMActive.OPERATOR, False) + pc = ksp.getPC() + pc.setType("mg") + pc.setMGLevels(nlev) + pc.setMGType(PETSc.PC.MGType.FULL) + for l in range(1, nlev): + pc.setMGInterpolation(l, Ps[l]) + pc.setFromOptions() + + if cfg.get("verbose"): + from underworld3 import mpi + mpi.pprint(f"[{solver.name}] custom_mg ({kind}): levels " + f"{[lv.shape[0] for lv in levels]}") diff --git a/tests/test_1015_custom_mg_prolongation.py b/tests/test_1015_custom_mg_prolongation.py new file mode 100644 index 00000000..ea975df1 --- /dev/null +++ b/tests/test_1015_custom_mg_prolongation.py @@ -0,0 +1,77 @@ +""" +Custom multigrid prolongation hook (set_custom_mg / utilities.custom_mg). + +Geometric multigrid driven by a prolongation we build ourselves (barycentric or +RBF) from independent, non-nested coarse meshes — decoupling geometric MG from a +nested refine() hierarchy. See docs/developer/design and the study notes. +""" +import numpy as np +import pytest +import underworld3 as uw + +pytestmark = [pytest.mark.level_2, pytest.mark.tier_b] + + +def _poisson(mesh): + p = uw.systems.Poisson(mesh) + p.constitutive_model = uw.constitutive_models.DiffusionModel + p.constitutive_model.Parameters.diffusivity = 1 + p.f = 0.0 + p.add_dirichlet_bc(0.0, "Bottom") + p.add_dirichlet_bc(1.0, "Top") + return p + + +def _box(cellSize, refinement=None): + return uw.meshing.UnstructuredSimplexBox( + minCoords=(0.0, 0.0), maxCoords=(1.0, 1.0), + cellSize=cellSize, refinement=refinement, qdegree=2) + + +def test_custom_mg_barycentric_matches_fmg(): + """Custom barycentric P (independent coarse meshes) must converge and agree + with the nested-FMG solution on the same fine problem.""" + fmg = _poisson(_box(0.25, refinement=2)) + fmg.solve() + assert fmg.snes.getConvergedReason() > 0 + + cust = _poisson(_box(0.25, refinement=2)) + cust.set_custom_mg([_box(0.285), _box(0.142)], kind="barycentric") + cust.solve() + assert cust.snes.getKSP().getPC().getType() == "mg" + assert cust.snes.getConvergedReason() > 0 + # same fine problem -> same solution (Galerkin RAP from our P) + rel = (np.linalg.norm(cust.Unknowns.u.data - fmg.Unknowns.u.data) + / (np.linalg.norm(fmg.Unknowns.u.data) + 1e-30)) + assert rel < 1e-4 + # competitive with nested FMG (not pathologically worse) + assert cust.snes.getKSP().getIterationNumber() <= fmg.snes.getKSP().getIterationNumber() + 5 + + +def test_custom_mg_rbf_drives_solver(): + """The RBF builder must also drive a converging geometric MG solve.""" + cust = _poisson(_box(0.25, refinement=2)) + cust.set_custom_mg([_box(0.285), _box(0.142)], kind="rbf") + cust.solve() + assert cust.snes.getKSP().getPC().getType() == "mg" + assert cust.snes.getConvergedReason() > 0 + + +def test_custom_mg_without_refine_hierarchy(): + """Key capability: geometric MG on a mesh with NO refine() hierarchy + (FMG unavailable -> normally GAMG). Custom P supplies the hierarchy.""" + mesh = _box(0.08) # no refinement -> single-level + assert len(mesh.dm_hierarchy) == 1 + cust = _poisson(mesh) + cust.set_custom_mg([_box(0.3), _box(0.16)], kind="barycentric") + cust.solve() + assert cust.snes.getKSP().getPC().getType() == "mg" + assert cust.snes.getConvergedReason() > 0 + + +def test_set_custom_mg_validation(): + p = _poisson(_box(0.5)) + with pytest.raises(ValueError): + p.set_custom_mg([_box(0.8)], kind="not-a-builder") + with pytest.raises(ValueError): + p.set_custom_mg([], kind="barycentric") From 76298f359d7d508a2b968e6fa0ca0cead159e430 Mon Sep 17 00:00:00 2001 From: lmoresi Date: Mon, 29 Jun 2026 06:18:39 +1000 Subject: [PATCH 02/15] feat(custom_mg): Layer-1 generalized FMG hierarchy (BC-per-level) + scoped SBR Phase 1 of hardening the custom-prolongation work into a generalized FMG hierarchy. - CustomMGHierarchy: adapter-agnostic multi-level hierarchy that builds custom-P transfers (barycentric/rbf) with essential BCs applied at EVERY level (transfers map reduced->reduced). This is the load-bearing invariant: on an exactly-nested hierarchy, omitting per-level reduction makes a coarse boundary DOF coincide with a BC-removed fine DOF -> zero column -> singular Galerkin coarse operator. build() guards against zero columns. - set_custom_fmg(): register a hierarchy + per-level solver factory; built and installed at solve() time (build-time injection, DMActive(OPERATOR,False), Galerkin RAP). Scalar / single-field-vector (top-level PC); Stokes velocity block is Phase 2. - sbr_refine / sbr_refine_where: local Skeleton-Based Refinement (no MMG, on-rank, conforming) with SCOPED dm_plex_transform_type (leaking it globally breaks UW's uniform refine() with err73). - Legacy finest-only path kept for back-compat (test_1015). Validated: scalar jump-coefficient Poisson on a 5-level (3 uniform + 2 SBR) hierarchy converges in 3 FMG iters vs GAMG 46, solution matches to 2.4e-8. test_1016 (3 tests); test_1015 (4) + test_1014 (11) still pass. Design: docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md Underworld development team with AI support from Claude Code --- .../GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md | 120 ++++++++ src/underworld3/utilities/custom_mg.py | 285 +++++++++++++++--- tests/test_1016_custom_mg_hierarchy.py | 60 ++++ 3 files changed, 424 insertions(+), 41 deletions(-) create mode 100644 docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md create mode 100644 tests/test_1016_custom_mg_hierarchy.py diff --git a/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md b/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md new file mode 100644 index 00000000..cb2908cc --- /dev/null +++ b/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md @@ -0,0 +1,120 @@ +# Generalized FMG hierarchy + adapt-on-top + +Status: design (hardening the custom-prolongation prototype). 2026-06-28. + +## Context + +UW3's geometric FMG (`Mesh(refinement=N)` → `dm_hierarchy`, `preconditioner="fmg"`) +requires a **uniform nested** hierarchy with PETSc's canonical nested interpolation. +That couples multigrid to uniform refinement and rules out local adaptation. + +A prototype established that we can drive geometric multigrid with a prolongation we +**build ourselves** (barycentric or local-RBF) and install via `PC.setMGInterpolation`, +with Galerkin RAP forming the coarse operators. Validated: +- FMG-equivalent convergence on non-nested *and* nested hierarchies (SolCx velocity + block, η-jump 1e6: 8 iters, vs GAMG 157 — geometric MG where FMG cannot otherwise exist); +- a 5-level hierarchy (3 uniform + 2 SBR-targeted at the jump) solves at 8 iters; +- local SBR refinement is a ~4-line stock-petsc4py call (no MMG): mark cells in a label, + `dm_plex_transform_type=refine_sbr`, `dm.adaptLabel("name")` — conforming, on-rank. + +**Load-bearing correctness lesson:** essential BCs must be applied at **every** level, +transfers mapping reduced→reduced. PETSc native FMG does this automatically (each level's +`PetscSection` carries the Dirichlet constraints); custom-P from raw coordinates must +replicate it. Exact nesting makes the omission fatal (coarse boundary-normal DOFs coincide +with BC-removed fine DOFs → zero columns → singular coarse operator). + +## Two-layer architecture + +### Layer 1 — generalized FMG hierarchy (adapter-agnostic) +**The existing `refinement=N` uniform hierarchy STANDS** and is the FMG base. Layer 1 +generalizes the *transfer machinery* so the hierarchy may contain levels that are **not** +uniform refinements, by carrying **supplied prolongations** instead of relying on canonical +nesting. + +Layer 1 knows nothing about *how* a level was produced. Its contract is a sequence of +levels, each `(dm, prolongation_to_next)`, plus: +- **BC-per-level reduction** — transfers map reduced→reduced (the invariant above), derived + from the owning solver's essential BCs via each level DM's global section. +- **Transfer builders** — pluggable: `nested` (PETSc canonical, for uniform levels), + `barycentric` (FE-exact, needs a triangulation), `rbf` (local kNN, scattered-point + robust, reuses the swarm-proxy RBF kernel). Default per level: `nested` where the level + is a genuine uniform refinement, else `barycentric`. +- **Installation** — set the transfers on the velocity-block (Stokes) or top (scalar/vector) + PCMG via `setMGInterpolation`, before first `PCSetUp`, with `KSPSetDMActive(OPERATOR,False)` + and `pc_mg_galerkin=both`. Re-attach nullspaces (pressure / rigid-body) after rebuild. + +Home: `src/underworld3/utilities/custom_mg.py` (grow the committed module). + +Generality requirement: Layer 1's interface must admit **any** future level source +(MMG/parmmg, edge/face-point injection, knockout-as-coarsening, independent meshes) and +**any** transfer builder. No SBR-specific assumptions in Layer 1. + +### Layer 2 — adapt-on-top (first concrete application) +On top of the uniform FMG base, add refined levels via the **existing adaptive-meshing +pattern**: choose an **adapter** + a **metric**, call **periodically**; field +transfer/redistribution handled by the existing remesh machinery +(`remesh_with_field_transfer`, `on_remesh`, `mesh.adapt`). + +This pass's adapter is **SBR refine-on-top**: +- metric → mark cells (e.g. |∇field| threshold, or distance-to-feature) → `adaptLabel` + with `refine_sbr` → a new finest level on top of the current finest; +- **non-load-balancing** (on-rank, no redistribution) — which **bounds the number of + extra levels** (imbalance grows with depth); the adapter exposes/limits this; +- registers `(new level DM, custom-P transfer)` into the Layer-1 hierarchy. + +Home: an adapter consistent with `adaptivity.py` / `mesh.adapt(metric, adapter=...)`, +calling into Layer 1. Future adapters (load-balancing MMG, etc.) plug in the same way. + +## Parallel strategy (parallel from the start) + +The supported parallel path is the **nested, co-partitioned** hierarchy: +- The uniform base is co-partitioned by construction (`dm.refine()` preserves partition); + SBR is on-rank → levels stay co-located (a fine cell's parent coarse cell is on the + same rank). So each rank builds its block of `P` from its **local** coarse cells — + point-location is rank-local; only a thin ghost layer at partition boundaries. +- **BC-per-level reduction is parallel-correct for free**: it rides the DM global section + (`localToGlobal` gives the reduced global ordering across ranks). +- `P` is assembled as an MPIAIJ matrix (fine local rows; coarse local + ghost columns). + +**Non-nested / independent-mesh custom-P** (cross-rank point-location) is **serial-only / +experimental** — it is not on the parallel-supported path. The production case +(targeted refinement = nested) needs only the rank-local path. + +Non-load-balancing SBR → bound the added refinement depth (configurable cap); document the +imbalance/level trade-off. + +## Correctness invariants (must hold, all levels) +1. BCs applied at every level; transfers reduced→reduced (no zero columns). +2. Each prolongation reproduces constants (row-sums = 1 — partition of unity). +3. `pc_mg_galerkin=both` (coarse operators = PᵀAP; UW installs no coarse residual/Jacobian). +4. Nullspaces (constant-pressure / rigid-body) re-attached after any rebuild. +5. No silent fallback: if a level lacks a valid transfer, error (don't degrade to GAMG silently). + +## Test matrix +- Hierarchy build: uniform-only; uniform + N SBR levels; nesting + label survival. +- Per-level reduction: zero-column count == 0 on all transfers (scalar + free-slip vector). +- Transfer: partition-of-unity; barycentric vs rbf; nested-exact vs general. +- Solve: FMG + V-cycle converge on nested and (serial) non-nested; match uniform-FMG iters; + beat GAMG; regression — existing FMG/GAMG paths unchanged. +- Parallel (np>1): co-located transfers, reduced sections, FMG convergence; bounded levels. + +## Phased roadmap +1. **Layer 1 core** — generalized hierarchy + auto BC-per-level reduction + transfer builders + (nested/barycentric/rbf) + install into PCMG; scalar + free-slip vector; serial-validated, + designed parallel-correct (rank-local construction). +2. **Stokes integration** — `set_custom_mg`/hierarchy on the velocity block; nullspace + re-attach; guards; SolCx end-to-end. +3. **Parallel validation** — np>1 tests; co-location/ghost handling; reduced-section + correctness; fix the known rank-local-accumulator pitfalls. +4. **Layer 2 adapter** — SBR refine-on-top following `mesh.adapt` (metric→mark→refine→register), + non-load-balancing, bounded depth, field transfer via existing remesh. +5. **Tests + design doc finalize + tier classification.** + +Later (not this pass; do not block): dynamic per-step reallocation loop; efficiency +(transfer caching, exact-nested combinatorial P, operator reuse); load-balancing adapters; +non-nested parallel transfers; knockout-as-coarsening as an alternative Layer-2 strategy. + +## Explicit non-goals for this pass +- Knockout (shown to pay full-fine assembly; structural value only) — not pursued now. +- Non-nested custom-P in parallel — serial/experimental only. +- Dynamic time-stepping convection integration — later phase. diff --git a/src/underworld3/utilities/custom_mg.py b/src/underworld3/utilities/custom_mg.py index 9ffb5c62..b6fea9c3 100644 --- a/src/underworld3/utilities/custom_mg.py +++ b/src/underworld3/utilities/custom_mg.py @@ -41,7 +41,8 @@ import numpy as np from petsc4py import PETSc -__all__ = ["barycentric_prolongation", "rbf_prolongation", "inject_custom_mg"] +__all__ = ["barycentric_prolongation", "rbf_prolongation", "inject_custom_mg", + "CustomMGHierarchy", "set_custom_fmg", "sbr_refine", "sbr_refine_where"] # --------------------------------------------------------------------------- # @@ -102,6 +103,57 @@ def phi(r): _BUILDERS = {"barycentric": barycentric_prolongation, "rbf": rbf_prolongation} +# --------------------------------------------------------------------------- # +# Local Skeleton-Based Refinement (no MMG; on-rank; conforming) +# --------------------------------------------------------------------------- # +_DM_ADAPT_REFINE = 1 # PETSc DM_ADAPT_REFINE + + +def _sbr_apply(dm, mark): + """Clone ``dm``, let ``mark(clone, adapt_label)`` flag cells, and SBR-refine. + Conforming (edge bisection + propagation, no hanging nodes), on-rank (no + redistribution), no external mesh libraries. Cell geometry / marking is done + on the CLONE (the un-cloned mesh DM raises err73 from computeCellGeometryFVM). + + IMPORTANT: ``dm_plex_transform_type`` is a *global* PETSc option; leaving it as + ``refine_sbr`` breaks UW3's uniform ``dm.refine()`` (FMG hierarchy build) with + PETSc error 73. It is set only for the duration of the call and restored.""" + opts = PETSc.Options() + had = opts.hasName("dm_plex_transform_type") + prev = opts.getString("dm_plex_transform_type") if had else None + opts.setValue("dm_plex_transform_type", "refine_sbr") + try: + d = dm.clone() + d.createLabel("adapt") + lab = d.getLabel("adapt") + lab.setDefaultValue(0) + mark(d, lab) + return d.adaptLabel("adapt") + finally: + if had: + opts.setValue("dm_plex_transform_type", prev) + else: + opts.delValue("dm_plex_transform_type") + + +def sbr_refine(dm, cells): + """SBR-refine an explicit list of ``cells``.""" + def mark(d, lab): + for c in cells: + lab.setValue(int(c), _DM_ADAPT_REFINE) + return _sbr_apply(dm, mark) + + +def sbr_refine_where(dm, predicate): + """SBR-refine cells whose centroid satisfies ``predicate(centroid)``.""" + def mark(d, lab): + cs, ce = d.getHeightStratum(0) + for c in range(cs, ce): + if predicate(d.computeCellGeometryFVM(c)[1]): + lab.setValue(c, _DM_ADAPT_REFINE) + return _sbr_apply(dm, mark) + + # --------------------------------------------------------------------------- # # Coordinate helpers # --------------------------------------------------------------------------- # @@ -138,50 +190,75 @@ def _reduce_to_global(dm, full_coords): # --------------------------------------------------------------------------- # -# Injection +# Layer 1 — generalized FMG hierarchy (BC-reduced, adapter-agnostic) +# +# INVARIANT (load-bearing): essential BCs are applied at EVERY level, so every +# transfer maps reduced->reduced. PETSc native FMG does this automatically via +# each level's constrained PetscSection; custom-P built from raw coordinates must +# replicate it. Omitting it is fatal on an EXACTLY-nested hierarchy: a coarse +# boundary DOF coincides with a BC-removed fine DOF -> zero column -> singular +# Galerkin coarse operator. (See docs/developer/design/ +# GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md.) # --------------------------------------------------------------------------- # -def inject_custom_mg(solver): - """Configure ``solver``'s PCMG to use our custom prolongation hierarchy. +def _field_subdm(dm, field_id): + """Return the sub-DM for ``field_id`` (whole dm if single-field / None).""" + if field_id is None: + return dm + try: + if dm.getNumFields() <= 1: + return dm + except Exception: + return dm + _iset, sub = dm.createSubDM(field_id) + return sub - Called from the solver's ``solve()`` (after ``_build``, before the SNES - solve) when ``solver._custom_mg`` is set via ``set_custom_mg``. Scalar / - single-field solvers only for now (the Stokes velocity-block path is a - separate increment). - """ - cfg = solver._custom_mg - coarse_meshes = cfg["coarse_meshes"] - kind = cfg["kind"] - builder = _BUILDERS[kind] - var = solver.Unknowns.u - degree = var.degree - continuous = getattr(var, "continuous", True) +def _reduced_map(dm, field_id=None): + """Reduced->full DOF map for a field on a BUILT DM (serial). Index ``k`` of + the returned array is the local DOF that maps to global/MG index ``k`` — + i.e. the BC-eliminated, MG-ordered layout the operator lives on. - if solver._pc_option_prefix not in ("", None): - raise NotImplementedError( - "custom_mg currently supports single-field (scalar/vector) solvers; " - f"solver uses PC prefix '{solver._pc_option_prefix}' (e.g. Stokes " - "velocity block) which is a separate increment.") + NOTE: serial-correct (uses local indices). Parallel correctness — mapping to + *global* numbering across ranks — is a Phase-3 item (the supported parallel + path keeps levels co-partitioned so this stays rank-local).""" + sub = _field_subdm(dm, field_id) + lv = sub.getLocalVec() + n_full = lv.getLocalSize() + lv.array[:] = np.arange(n_full) + gv = sub.getGlobalVec() + sub.localToGlobal(lv, gv, addv=False) + r2f = gv.array.astype(int).copy() + sub.restoreLocalVec(lv) + sub.restoreGlobalVec(gv) + return r2f, n_full - # --- per-level DOF coordinates --------------------------------------- # - fine = _reduce_to_global(solver.dm, - solver.mesh._get_coords_for_basis(degree, continuous)) - levels = [m._get_coords_for_basis(degree, continuous) for m in coarse_meshes] - levels.append(fine) - nlev = len(levels) - if nlev < 2: - raise ValueError("custom_mg needs at least one coarse mesh") - # --- build prolongations P[l]: level l-1 -> level l ------------------ # - Ps = [None] + [_to_petsc_aij(builder(levels[l - 1], levels[l])) - for l in range(1, nlev)] +def _reduced_transfer(coarse_coords, fine_coords, r2f_c, r2f_f, ncomp, builder): + """Build one prolongation reduced(coarse) -> reduced(fine): + node-level scalar P -> interleave ``ncomp`` components -> drop BC rows/cols.""" + import scipy.sparse as sp + Pn = builder(coarse_coords, fine_coords) # (n_f_nodes, n_c_nodes) + Pv = sp.kron(Pn, sp.eye(ncomp), format="csr") # interleaved full vector + Pr = Pv.tocsr()[r2f_f, :][:, r2f_c] # reduced -> reduced + return Pr + - # --- configure the geometric MG bundle on the managed block ---------- # +def _install_transfers(solver, Ps, verbose=False): + """Configure the managed PCMG block to use the supplied prolongations. + Build-time injection (before first PCSetUp): set DMActive(OPERATOR,False) so + PETSc does not re-derive interpolation from the DM, Galerkin RAP for coarse + operators. Scalar / single-field-vector (top-level PC: ``_pc_option_prefix`` + is ``""``); the Stokes velocity-block path is Phase 2.""" + nlev = len(Ps) + 1 opts = solver.petsc_options pfx = solver._pc_option_prefix or "" + if pfx not in ("",): + raise NotImplementedError( + "Layer-1 install supports top-level PC (scalar / single-field vector). " + f"PC prefix '{pfx}' (e.g. Stokes velocity block) is Phase 2.") opts[pfx + "pc_type"] = "mg" opts[pfx + "pc_mg_type"] = "full" - opts[pfx + "pc_mg_galerkin"] = "both" # coarse operators = PᵀAP + opts[pfx + "pc_mg_galerkin"] = "both" opts[pfx + "mg_levels_ksp_type"] = "richardson" opts[pfx + "mg_levels_pc_type"] = "sor" opts[pfx + "mg_coarse_pc_type"] = "redundant" @@ -189,20 +266,146 @@ def inject_custom_mg(solver): for key in ("pc_gamg_type", "pc_gamg_repartition", "pc_gamg_agg_nsmooths"): opts.delValue(pfx + key) - # --- inject before the first PCSetUp --------------------------------- # solver.snes.setUp() ksp = solver.snes.getKSP() - # Stop PETSc re-deriving interpolation/operators from the DM hierarchy. ksp.setDMActive(PETSc.KSP.DMActive.OPERATOR, False) pc = ksp.getPC() pc.setType("mg") pc.setMGLevels(nlev) pc.setMGType(PETSc.PC.MGType.FULL) for l in range(1, nlev): - pc.setMGInterpolation(l, Ps[l]) + pc.setMGInterpolation(l, Ps[l - 1]) pc.setFromOptions() - - if cfg.get("verbose"): + if verbose: from underworld3 import mpi - mpi.pprint(f"[{solver.name}] custom_mg ({kind}): levels " - f"{[lv.shape[0] for lv in levels]}") + mpi.pprint(f"[{solver.name}] custom FMG installed: {nlev} levels, " + f"P sizes {[tuple(P.getSize()) for P in Ps]}") + + +class CustomMGHierarchy: + """A generalized FMG hierarchy: a sequence of level meshes (coarsest..finest) + whose transfers are built by a pluggable builder, with BCs applied at every + level. Adapter-agnostic — it consumes meshes + a way to get each level's + BC-reduced DOF map; it does not know how the levels were produced. + + Parameters + ---------- + level_meshes : list of Mesh + Coarsest-first; the LAST entry must be the solver's own mesh. + builder : {"barycentric", "rbf"} + Per-level node prolongation builder. + field_id : int or None + Field index for multi-field solvers (e.g. 0 = velocity); None = single field. + """ + + def __init__(self, level_meshes, builder="barycentric", field_id=None): + if builder not in _BUILDERS: + raise ValueError("builder must be 'barycentric' or 'rbf'") + if len(level_meshes) < 2: + raise ValueError("need at least 2 levels (>=1 coarse + finest)") + self.level_meshes = list(level_meshes) + self.builder = _BUILDERS[builder] + self.builder_name = builder + self.field_id = field_id + self.transfers = None + + def build(self, solver, level_solver_factory): + """Build the BC-reduced prolongations. ``solver`` is the (built) finest + solver; ``level_solver_factory(mesh) -> built solver`` provides a + same-discretisation solver on each COARSE level so its constrained + section gives that level's reduced map. The finest level uses ``solver``.""" + var = solver.Unknowns.u + degree = var.degree + continuous = getattr(var, "continuous", True) + nlev = len(self.level_meshes) + + coords, r2f, ncomp = [], [], [] + for k, mesh in enumerate(self.level_meshes): + c = np.asarray(mesh._get_coords_for_basis(degree, continuous)) + if k == nlev - 1: + rmap, nfull = _reduced_map(solver.dm, self.field_id) + else: + tmp = level_solver_factory(mesh) + tmp._build(False, False, None) + tmp.snes.setUp() + rmap, nfull = _reduced_map(tmp.dm, self.field_id) + nc = nfull // c.shape[0] + if nfull % c.shape[0] != 0: + raise RuntimeError( + f"level {k}: full DOFs {nfull} not divisible by nodes {c.shape[0]}") + coords.append(c); r2f.append(rmap); ncomp.append(nc) + + if len(set(ncomp)) != 1: + raise RuntimeError(f"inconsistent component counts across levels: {ncomp}") + nc = ncomp[0] + + Ps = [] + for l in range(1, nlev): + Pr = _reduced_transfer(coords[l - 1], coords[l], r2f[l - 1], r2f[l], + nc, self.builder) + zc = int((np.asarray((Pr != 0).sum(axis=0)).ravel() == 0).sum()) + if zc: + raise RuntimeError( + f"transfer {l-1}->{l} has {zc} zero columns (coarse DOFs with no " + f"fine image) — BC-per-level reduction failed; coarse operator " + f"would be singular.") + Ps.append(_to_petsc_aij(Pr)) + self.transfers = Ps + return Ps + + def install(self, solver, verbose=False): + if self.transfers is None: + raise RuntimeError("call build() before install()") + _install_transfers(solver, self.transfers, verbose=verbose) + + +# --------------------------------------------------------------------------- # +# Entry points +# --------------------------------------------------------------------------- # +def set_custom_fmg(solver, coarse_meshes, *, level_solver_factory, + builder="barycentric", field_id=None, verbose=False): + """Generalized custom-P FMG with BC-per-level reduction (the correct path). + + Registers a :class:`CustomMGHierarchy` on the solver so that the next + ``solve()`` builds and installs it (build-time injection). The hierarchy is + ``[*coarse_meshes, solver.mesh]``; ``level_solver_factory(mesh)`` must return + a same-discretisation solver on a coarse level (used only to read its + BC-constrained section).""" + solver._custom_mg = { + "mode": "hierarchy", + "hierarchy": CustomMGHierarchy(list(coarse_meshes) + [solver.mesh], + builder=builder, field_id=field_id), + "factory": level_solver_factory, + "verbose": verbose, + } + solver.is_setup = False + + +def inject_custom_mg(solver): + """Build + install the custom-P FMG. Called from ``solve()`` (after ``_build``, + before the SNES solve) when ``solver._custom_mg`` is set. Dispatches: + - ``mode == "hierarchy"`` -> BC-per-level reduced path (correct, general); + - legacy dict ``{coarse_meshes, kind}`` -> finest-only reduction (kept for + back-compat; valid only when coarse levels are non-nested / unconstrained).""" + cfg = solver._custom_mg + + if isinstance(cfg, dict) and cfg.get("mode") == "hierarchy": + h = cfg["hierarchy"] + h.build(solver, cfg["factory"]) + h.install(solver, verbose=cfg.get("verbose", False)) + return + + # ---- legacy finest-only path (back-compat) ------------------------------ + coarse_meshes = cfg["coarse_meshes"] + builder = _BUILDERS[cfg["kind"]] + var = solver.Unknowns.u + degree = var.degree + continuous = getattr(var, "continuous", True) + if solver._pc_option_prefix not in ("", None): + raise NotImplementedError("legacy custom_mg path is scalar/single-field only") + fine = _reduce_to_global(solver.dm, + solver.mesh._get_coords_for_basis(degree, continuous)) + levels = [m._get_coords_for_basis(degree, continuous) for m in coarse_meshes] + levels.append(fine) + Ps = [_to_petsc_aij(builder(levels[l - 1], levels[l])) for l in range(1, len(levels))] + _install_transfers(solver, Ps, verbose=cfg.get("verbose", False)) diff --git a/tests/test_1016_custom_mg_hierarchy.py b/tests/test_1016_custom_mg_hierarchy.py new file mode 100644 index 00000000..60c873b2 --- /dev/null +++ b/tests/test_1016_custom_mg_hierarchy.py @@ -0,0 +1,60 @@ +"""Layer-1 generalized FMG hierarchy: CustomMGHierarchy + set_custom_fmg with +BC-per-level reduction, over a uniform + SBR-targeted nested hierarchy.""" +import numpy as np, sympy, pytest, underworld3 as uw +from petsc4py import PETSc +from underworld3.utilities import custom_mg + +pytestmark = [pytest.mark.level_2, pytest.mark.tier_b] + +def _sbr(dm, band=0.2): + # scoped SBR (restores dm_plex_transform_type so uniform refine() still works) + return custom_mg.sbr_refine_where(dm, lambda cen: abs(cen[0]-0.5) < band) +def _wrap(dm,m0): + return uw.discretisation.Mesh(dm.clone(), simplex=True, + coordinate_system_type=m0.CoordinateSystem.coordinate_type, qdegree=3, boundaries=m0.boundaries) +def _poisson(mesh): + p=uw.systems.Poisson(mesh); p.constitutive_model=uw.constitutive_models.DiffusionModel + x,y=mesh.X + p.constitutive_model.Parameters.diffusivity=sympy.Piecewise((1.0,x<0.5),(1000.0,True)) + p.f=8*sympy.pi**2*sympy.sin(2*sympy.pi*x)*sympy.sin(2*sympy.pi*y) + for b in ("Bottom","Top","Left","Right"): p.add_dirichlet_bc(0.0,b) + p.petsc_options["ksp_rtol"]=1e-8; p.petsc_options["ksp_type"]="cg" + return p + +def _hierarchy(): + m0=uw.meshing.UnstructuredSimplexBox(minCoords=(0,0),maxCoords=(1,1),cellSize=0.5,regular=True,qdegree=3) + dm0=m0.dm; dm1=dm0.refine(); dm2=_sbr(dm1) # 2 uniform-ish + 1 SBR + return m0, [_wrap(dm0,m0), _wrap(dm1,m0)], _wrap(dm2,m0) + +def test_custom_fmg_hierarchy_converges(): + m0, coarse, fine = _hierarchy() + s=_poisson(fine) + custom_mg.set_custom_fmg(s, coarse, level_solver_factory=_poisson, builder="barycentric") + s.solve() + assert s.snes.getKSP().getPC().getType()=="mg" + assert s.snes.getKSP().getPC().getMGLevels()==3 + assert s.snes.getConvergedReason()>0 + # matches a GAMG solve of the same fine problem + g=_poisson(_wrap(fine.dm,m0)); g.preconditioner="gamg"; g.solve() + rel=np.linalg.norm(s.Unknowns.u.data-g.Unknowns.u.data)/np.linalg.norm(g.Unknowns.u.data) + assert rel < 1e-4 + +def test_bc_per_level_reduction_no_zero_columns(): + """build() must produce transfers with no zero columns (BCs at every level).""" + m0, coarse, fine = _hierarchy() + s=_poisson(fine); s._build(False,False,None); s.snes.setUp() + h=custom_mg.CustomMGHierarchy(coarse+[fine], builder="barycentric") + Ps=h.build(s, _poisson) # raises if any transfer has zero columns + assert len(Ps)==2 + for P in Ps: + Pc=P.getValuesCSR(); import scipy.sparse as sp + M=sp.csr_matrix(Pc[::-1], shape=P.getSize()) + assert int((np.asarray((M!=0).sum(axis=0)).ravel()==0).sum())==0 + +def test_rbf_builder_also_works(): + m0, coarse, fine = _hierarchy() + s=_poisson(fine) + custom_mg.set_custom_fmg(s, coarse, level_solver_factory=_poisson, builder="rbf") + s.solve() + assert s.snes.getKSP().getPC().getType()=="mg" + assert s.snes.getConvergedReason()>0 From 6eb0b340d8e48d6762971c9c49d269b26eab51a8 Mon Sep 17 00:00:00 2001 From: lmoresi Date: Mon, 29 Jun 2026 06:36:58 +1000 Subject: [PATCH 03/15] docs(custom_mg): Layer-1 status + remaining hardening before merge Layer 1 (generalized FMG hierarchy) is an independent, working capability (arbitrary nested/non-nested coarse grids -> geometric FMG, BC-per-level). Current scope experimental: scalar/single-field, serial, factory API. Remaining before merge-ready general feature: Stokes velocity-block, drop the factory (label-based BC reduction), parallel. Underworld development team with AI support from Claude Code --- .../GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md b/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md index cb2908cc..e0016959 100644 --- a/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md +++ b/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md @@ -114,6 +114,31 @@ Later (not this pass; do not block): dynamic per-step reallocation loop; efficie (transfer caching, exact-nested combinatorial P, operator reuse); load-balancing adapters; non-nested parallel transfers; knockout-as-coarsening as an alternative Layer-2 strategy. +## Status (2026-06-29) + +**Layer 1 is an independent, working capability** — arbitrary coarse grids +(nested *or* non-nested, uniform *or* SBR-refined) → geometric FMG via custom-P +with BC-per-level reduction. It does not depend on Layer 2. + +Landed (committed, tested — `test_1014/1015/1016`, 18 pass): +- `CustomMGHierarchy`, `set_custom_fmg`, `sbr_refine`/`sbr_refine_where`; +- automatic BC-per-level reduction + zero-column guard; +- validated: scalar jump-coeff Poisson, 5-level (3 uniform + 2 SBR), 3 FMG iters + vs GAMG 46. + +**Current scope = experimental:** scalar / single-field-vector (top-level PC), +**serial**, and `set_custom_fmg` takes a `level_solver_factory`. + +Remaining hardening **before** this is a merge-ready general feature (the +relaunch's first task, in order): +1. **Stokes / saddle-point** (velocity-block injection; `_install_transfers` + rejects `fieldsplit_velocity_`). Blocker: velocity sub-PC unreachable pre-solve + → use Galerkin-off + `MatPtAP` coarse ops after first solve. + nullspace re-attach. +2. **Drop the factory** — derive per-level constrained DOFs from boundary labels + + BC components (no throwaway solver) for a clean API. +3. **Parallel (np>1)** — `_reduced_map` is serial; validate co-located rank-local + transfers (fast-follow). + ## Explicit non-goals for this pass - Knockout (shown to pay full-fine assembly; structural value only) — not pursued now. - Non-nested custom-P in parallel — serial/experimental only. From 2dc60e3333e5bd51c9025bab8745c994436cbf79 Mon Sep 17 00:00:00 2001 From: lmoresi Date: Mon, 29 Jun 2026 13:14:18 +1000 Subject: [PATCH 04/15] feat(custom_mg): Stokes velocity-block custom-P FMG (Phase 1, Step 1) Integrate the generalized FMG hierarchy into the saddle-point solver's velocity sub-block. set_custom_fmg(..., field_id=0) now drives geometric multigrid on the velocity block with our supplied (barycentric / RBF) prolongations + Galerkin RAP. The velocity sub-PC is unreachable until the monolithic Jacobian is assembled (PCFieldSplit forms A_vv via MatCreateSubMatrix; snes.setUp builds structure only -> err73). So _install_velocity_block_transfers: forces a Jacobian assembly (computeFunction + computeJacobian at the zero guess; throwaway max_it=0 fallback), reaches the velocity sub-PC, reset()s it and rebuilds a FRESH PCMG from our P (mirrors the proven standalone recipe; sidesteps the MatProductReplaceMats live-swap bug), re-attaches the coupled Stokes nullspace. _configure_pcmg now derives the options prefix from pc.getOptionsPrefix() so both the scalar top-level PC and the velocity sub-PC are configured correctly. inject_custom_mg is wired into SNES_Stokes_SaddlePt.solve (guarded no-op unless set_custom_fmg was called), injected after setFromOptions / nullspace, before the real solve. Validated (SolCx eta_B=1e6, 3-level nested hierarchy, in-solver via solver.solve()): velocity block converges in 6 MG iters vs GAMG 198, solution matches the GAMG reference to 1.5e-9. test_1017 (barycentric + rbf). test_1014/1015/1016 unchanged (20 pass total). Underworld development team with AI support from Claude Code --- .../cython/petsc_generic_snes_solvers.pyx | 14 ++ src/underworld3/utilities/custom_mg.py | 138 ++++++++++++++---- tests/test_1017_custom_mg_stokes.py | 119 +++++++++++++++ 3 files changed, 245 insertions(+), 26 deletions(-) create mode 100644 tests/test_1017_custom_mg_stokes.py diff --git a/src/underworld3/cython/petsc_generic_snes_solvers.pyx b/src/underworld3/cython/petsc_generic_snes_solvers.pyx index 35c57ae3..58f21ab1 100644 --- a/src/underworld3/cython/petsc_generic_snes_solvers.pyx +++ b/src/underworld3/cython/petsc_generic_snes_solvers.pyx @@ -7458,6 +7458,13 @@ class SNES_Stokes_SaddlePt(SolverBaseClass): self.petsc_options.setValue("snes_max_it", snes_max_it) self.snes.setFromOptions() self._attach_stokes_nullspace() + # Custom geometric-MG prolongation on the velocity block (if registered + # via set_custom_fmg). Injected here — after setFromOptions/nullspace, + # before the real solve — because the velocity sub-PC is only reachable + # once the monolithic Jacobian is assembled (see custom_mg). + if self._custom_mg is not None: + from underworld3.utilities.custom_mg import inject_custom_mg + inject_custom_mg(self) self._snes_solve_with_retries(gvec, divergence_retries, verbose) else: @@ -7468,6 +7475,13 @@ class SNES_Stokes_SaddlePt(SolverBaseClass): self.petsc_options.setValue("snes_max_it", snes_max_it) self.snes.setFromOptions() self._attach_stokes_nullspace() + # Custom geometric-MG prolongation on the velocity block (if registered + # via set_custom_fmg). Injected here — after setFromOptions/nullspace, + # before the real solve — because the velocity sub-PC is only reachable + # once the monolithic Jacobian is assembled (see custom_mg). + if self._custom_mg is not None: + from underworld3.utilities.custom_mg import inject_custom_mg + inject_custom_mg(self) self._snes_solve_with_retries(gvec, divergence_retries, verbose) # Project the rigid-body rotation gauge out of the converged solution. diff --git a/src/underworld3/utilities/custom_mg.py b/src/underworld3/utilities/custom_mg.py index b6fea9c3..187634db 100644 --- a/src/underworld3/utilities/custom_mg.py +++ b/src/underworld3/utilities/custom_mg.py @@ -243,42 +243,128 @@ def _reduced_transfer(coarse_coords, fine_coords, r2f_c, r2f_f, ncomp, builder): return Pr -def _install_transfers(solver, Ps, verbose=False): - """Configure the managed PCMG block to use the supplied prolongations. - Build-time injection (before first PCSetUp): set DMActive(OPERATOR,False) so - PETSc does not re-derive interpolation from the DM, Galerkin RAP for coarse - operators. Scalar / single-field-vector (top-level PC: ``_pc_option_prefix`` - is ``""``); the Stokes velocity-block path is Phase 2.""" +def _configure_pcmg(pc, Ps): + """Reconfigure ``pc`` as a fresh PCMG (FMG F-cycle) driven by the supplied + reduced->reduced prolongations ``Ps``, Galerkin RAP for coarse operators. + + Writes the MG bundle into the options DB under the PC's OWN options prefix + (``pc.getOptionsPrefix()`` — e.g. ``Solver_N_`` for the scalar top-level PC, + ``Solver_N_fieldsplit_velocity_`` for the Stokes velocity sub-PC) and removes + any gamg keys BEFORE ``setFromOptions`` — otherwise ``setFromOptions`` re-reads + a lingering ``pc_type=gamg`` and reverts ``setType("mg")``. ``setMGInterpolation`` + persists through ``setFromOptions``; the first ``PCSetUp`` builds the coarse + operators from our P (no ``MatProductReplaceMats`` shape bug, since the PCMG is + fresh and P's size is fixed).""" nlev = len(Ps) + 1 - opts = solver.petsc_options - pfx = solver._pc_option_prefix or "" - if pfx not in ("",): - raise NotImplementedError( - "Layer-1 install supports top-level PC (scalar / single-field vector). " - f"PC prefix '{pfx}' (e.g. Stokes velocity block) is Phase 2.") - opts[pfx + "pc_type"] = "mg" - opts[pfx + "pc_mg_type"] = "full" - opts[pfx + "pc_mg_galerkin"] = "both" - opts[pfx + "mg_levels_ksp_type"] = "richardson" - opts[pfx + "mg_levels_pc_type"] = "sor" - opts[pfx + "mg_coarse_pc_type"] = "redundant" - opts[pfx + "mg_coarse_redundant_pc_type"] = "lu" + prefix = pc.getOptionsPrefix() or "" + opts = PETSc.Options() + opts.setValue(prefix + "pc_type", "mg") + opts.setValue(prefix + "pc_mg_type", "full") + opts.setValue(prefix + "pc_mg_galerkin", "both") + opts.setValue(prefix + "mg_levels_ksp_type", "richardson") + opts.setValue(prefix + "mg_levels_pc_type", "sor") + opts.setValue(prefix + "mg_coarse_pc_type", "redundant") + opts.setValue(prefix + "mg_coarse_redundant_pc_type", "lu") for key in ("pc_gamg_type", "pc_gamg_repartition", "pc_gamg_agg_nsmooths"): - opts.delValue(pfx + key) - - solver.snes.setUp() - ksp = solver.snes.getKSP() - ksp.setDMActive(PETSc.KSP.DMActive.OPERATOR, False) - pc = ksp.getPC() + opts.delValue(prefix + key) pc.setType("mg") pc.setMGLevels(nlev) pc.setMGType(PETSc.PC.MGType.FULL) for l in range(1, nlev): pc.setMGInterpolation(l, Ps[l - 1]) pc.setFromOptions() + + +def _install_transfers(solver, Ps, verbose=False): + """Configure the managed PCMG block to use the supplied prolongations. + + Two paths, keyed by ``solver._pc_option_prefix``: + + * **scalar / single-field vector** (top-level PC, prefix ``""``): build-time + injection *before* the first ``PCSetUp`` — the first Galerkin assembly is + built from our P directly. + * **Stokes velocity block** (prefix ``"fieldsplit_velocity_"``): the velocity + sub-PC is unreachable until the monolithic Jacobian is assembled + (``PCFieldSplit`` forms ``A_vv`` via ``MatCreateSubMatrix``; ``snes.setUp`` + builds structure only -> err73). So force a Jacobian assembly, reach the + velocity sub-PC, ``reset`` it and rebuild a fresh PCMG from our P, then + re-attach the coupled Stokes nullspace. + + Either way ``KSPSetDMActive(OPERATOR, False)`` stops PETSc re-deriving the + interpolation from the DM hierarchy.""" + nlev = len(Ps) + 1 + pfx = solver._pc_option_prefix or "" + + if pfx == "": + ksp = solver.snes.getKSP() + solver.snes.setUp() + ksp.setDMActive(PETSc.KSP.DMActive.OPERATOR, False) + _configure_pcmg(ksp.getPC(), Ps) + if verbose: + from underworld3 import mpi + mpi.pprint(f"[{solver.name}] custom FMG installed: {nlev} levels, " + f"P sizes {[tuple(P.getSize()) for P in Ps]}") + return + + if pfx != "fieldsplit_velocity_": + raise NotImplementedError( + f"custom_mg install: unsupported PC prefix '{pfx}'.") + + _install_velocity_block_transfers(solver, Ps, verbose=verbose) + + +def _install_velocity_block_transfers(solver, Ps, verbose=False): + """Stokes velocity-block install (mechanism A: reset + fresh PCMG). + + Preconditions: ``solver._build`` + ``setFromOptions`` + ``_attach_stokes_nullspace`` + have run (the call site in ``SNES_Stokes_SaddlePt.solve`` guarantees this), so + the SNES / DM exist but the Jacobian VALUES are not yet assembled.""" + snes = solver.snes + snes.setUp() + + # 1. force monolithic Jacobian assembly so the fieldsplit can form A_vv + x0 = solver.dm.getGlobalVec() + x0.set(0.0) + J = snes.getJacobian()[0] + Pmat = snes.getJacobian()[1] + try: + f = J.createVecLeft() + snes.computeFunction(x0, f) + snes.computeJacobian(x0, J, Pmat) + except PETSc.Error: + # fallback: throwaway max_it=0 solve assembles + splits the operator + saved = (solver.petsc_options.getString("snes_max_it") + if solver.petsc_options.hasName("snes_max_it") else None) + solver.petsc_options["snes_max_it"] = 0 + snes.setFromOptions() + snes.solve(None, x0) + if saved is not None: + solver.petsc_options["snes_max_it"] = saved + snes.setFromOptions() + solver.dm.restoreGlobalVec(x0) + + # 2. split -> reach the velocity sub-KSP / sub-PC (field 0) + outer_pc = snes.getKSP().getPC() + outer_pc.setUp() + vel_ksp = outer_pc.getFieldSplitSubKSP()[0] + vel_pc = vel_ksp.getPC() + sub = vel_pc.getOptionsPrefix() or "fieldsplit_velocity_" + A_vv, P_vv = vel_pc.getOperators() # capture before reset (reset drops them) + + # 3. fresh PCMG on the velocity sub-block from our Ps + vel_ksp.setDMActive(PETSc.KSP.DMActive.OPERATOR, False) + vel_pc.reset() + vel_pc.setOperators(A_vv, P_vv) + _configure_pcmg(vel_pc, Ps) + vel_pc.setUp() + + # 4. re-attach the coupled Stokes nullspace (operator state was touched) + solver._attach_stokes_nullspace() + if verbose: from underworld3 import mpi - mpi.pprint(f"[{solver.name}] custom FMG installed: {nlev} levels, " + mpi.pprint(f"[{solver.name}] custom FMG installed on velocity block: " + f"{len(Ps) + 1} levels, sub-prefix {sub!r}, " f"P sizes {[tuple(P.getSize()) for P in Ps]}") diff --git a/tests/test_1017_custom_mg_stokes.py b/tests/test_1017_custom_mg_stokes.py new file mode 100644 index 00000000..5abe7b42 --- /dev/null +++ b/tests/test_1017_custom_mg_stokes.py @@ -0,0 +1,119 @@ +"""Layer-1 generalized FMG hierarchy on the STOKES velocity block (Phase-1 Step 1). + +Custom-built prolongations (barycentric / RBF) drive geometric multigrid on the +velocity sub-block of the saddle-point solver, via set_custom_fmg(field_id=0). +Validated on SolCx (eta_B=1e6): the velocity block converges in a handful of MG +iterations (vs ~200 for GAMG) and the solution matches a GAMG reference. + +The hard part (see custom_mg._install_velocity_block_transfers): the velocity +sub-PC is unreachable until the monolithic Jacobian is assembled, so the install +forces a Jacobian assembly, reaches the velocity sub-PC, and rebuilds a fresh PCMG +from our supplied prolongations. +""" +import numpy as np +import pytest +import underworld3 as uw +from underworld3.function import analytic as A +from underworld3.utilities import custom_mg + +pytestmark = [pytest.mark.level_2, pytest.mark.tier_b] + + +def _wrap(dm, m0): + return uw.discretisation.Mesh( + dm.clone(), simplex=True, + coordinate_system_type=m0.CoordinateSystem.coordinate_type, + qdegree=3, boundaries=m0.boundaries) + + +def _hierarchy(): + """cellSize 0.25 base, two uniform refinements -> 3 nested levels.""" + m0 = uw.meshing.UnstructuredSimplexBox( + minCoords=(0, 0), maxCoords=(1, 1), cellSize=0.25, regular=True, qdegree=3) + dm0 = m0.dm + dm1 = dm0.refine() + dm2 = dm1.refine() + meshes = [_wrap(dm0, m0), _wrap(dm1, m0), _wrap(dm2, m0)] + return meshes[:-1], meshes[-1] + + +def _walls(s): + s.add_dirichlet_bc((0.0, None), "Left") + s.add_dirichlet_bc((0.0, None), "Right") + s.add_dirichlet_bc((None, 0.0), "Bottom") + s.add_dirichlet_bc((None, 0.0), "Top") + s.petsc_use_pressure_nullspace = True + s.petsc_options["snes_type"] = "ksponly" + + +def _coarse_factory(mesh): + """Same-discretisation coarse-level Stokes (only its constrained section is + read by CustomMGHierarchy.build to derive the BC-reduced DOF map).""" + s = uw.systems.Stokes(mesh) + s.constitutive_model = uw.constitutive_models.ViscousFlowModel + s.constitutive_model.Parameters.shear_viscosity_0 = 1.0 + _walls(s) + return s + + +def _solcx(mesh, eta_B=1e6): + sol = A.SolCx(mesh, eta_A=1.0, eta_B=eta_B, x_c=0.5, n=1) + s = uw.systems.Stokes(mesh) + s.constitutive_model = uw.constitutive_models.ViscousFlowModel + s.constitutive_model.Parameters.shear_viscosity_0 = sol.fn_viscosity + s.saddle_preconditioner = 1.0 / sol.fn_viscosity + s.bodyforce = sol.fn_bodyforce + _walls(s) + s.tolerance = 1e-8 + return s, sol + + +def _vel_ksp(s): + return s.snes.getKSP().getPC().getFieldSplitSubKSP()[0] + + +def test_custom_fmg_velocity_block_barycentric(): + """Custom barycentric P drives geometric MG on the SolCx velocity block, + converging in few iterations and matching a GAMG reference solution.""" + coarse, fine = _hierarchy() + + # GAMG reference + sg, solg = _solcx(fine) + sg.preconditioner = "gamg" + sg.solve() + verr_g = solg.velocity_error(sg.u) + iters_g = _vel_ksp(sg).getIterationNumber() + + # custom-P geometric MG on the velocity block + s, sol = _solcx(fine) + custom_mg.set_custom_fmg(s, coarse, level_solver_factory=_coarse_factory, + builder="barycentric", field_id=0) + s.solve() + vksp = _vel_ksp(s) + + assert s.snes.getConvergedReason() > 0 + assert vksp.getPC().getType() == "mg" + assert vksp.getPC().getMGLevels() == len(coarse) + 1 + # geometric MG crushes GAMG on the eta-jump velocity block + assert vksp.getIterationNumber() <= 15 + assert vksp.getIterationNumber() < iters_g + # correct solution: matches analytic to GAMG's accuracy, and matches GAMG itself + verr = sol.velocity_error(s.u) + assert verr < 2.0 * verr_g + 1e-6 + rel = np.linalg.norm(s.u.data - sg.u.data) / (np.linalg.norm(sg.u.data) + 1e-30) + assert rel < 1e-4 + + +def test_custom_fmg_velocity_block_rbf(): + """The RBF builder must also drive a converging velocity-block MG solve.""" + coarse, fine = _hierarchy() + s, sol = _solcx(fine) + custom_mg.set_custom_fmg(s, coarse, level_solver_factory=_coarse_factory, + builder="rbf", field_id=0) + s.solve() + vksp = _vel_ksp(s) + assert s.snes.getConvergedReason() > 0 + assert vksp.getPC().getType() == "mg" + assert vksp.getPC().getMGLevels() == len(coarse) + 1 + assert vksp.getIterationNumber() <= 25 + assert sol.velocity_error(s.u) < 1e-2 From cb77289e52c7347f5dc5a06265116f4584004303 Mon Sep 17 00:00:00 2001 From: lmoresi Date: Mon, 29 Jun 2026 13:14:44 +1000 Subject: [PATCH 05/15] docs(custom_mg): mark Stokes velocity-block integration done (Phase 1, Step 1) Underworld development team with AI support from Claude Code --- .../GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md b/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md index e0016959..19bbc72b 100644 --- a/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md +++ b/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md @@ -126,14 +126,26 @@ Landed (committed, tested — `test_1014/1015/1016`, 18 pass): - validated: scalar jump-coeff Poisson, 5-level (3 uniform + 2 SBR), 3 FMG iters vs GAMG 46. -**Current scope = experimental:** scalar / single-field-vector (top-level PC), -**serial**, and `set_custom_fmg` takes a `level_solver_factory`. - -Remaining hardening **before** this is a merge-ready general feature (the -relaunch's first task, in order): -1. **Stokes / saddle-point** (velocity-block injection; `_install_transfers` - rejects `fieldsplit_velocity_`). Blocker: velocity sub-PC unreachable pre-solve - → use Galerkin-off + `MatPtAP` coarse ops after first solve. + nullspace re-attach. +**Current scope = experimental:** **serial**, and `set_custom_fmg` takes a +`level_solver_factory`. Scalar / single-field-vector **and** Stokes velocity-block +are now supported. + +Remaining hardening **before** this is a merge-ready general feature (in order): +1. ~~**Stokes / saddle-point** (velocity-block injection).~~ **DONE** (2026-06-29). + `set_custom_fmg(..., field_id=0)` drives custom-P geometric MG on the velocity + sub-block. The sub-PC is unreachable until the monolithic Jacobian is assembled, + so `_install_velocity_block_transfers` forces a Jacobian assembly + (`computeFunction`+`computeJacobian` at the zero guess; `max_it=0` fallback), + reaches the velocity sub-PC, `reset`s it and rebuilds a **fresh PCMG** from our P + (mechanism A — mirrors the proven standalone recipe and sidesteps the + `MatProductReplaceMats` live-swap bug; the Galerkin-off + `MatPtAP` path remains + the documented fallback), then re-attaches the coupled Stokes nullspace. Wired + into `SNES_Stokes_SaddlePt.solve` as a guarded no-op. Validated on SolCx + (η-jump 1e6, 3-level nested, in-solver): velocity block 6 MG iters vs GAMG 198, + solution matches GAMG to 1.5e-9 (`test_1017`). NOTE: free-slip velocity + rigid-body modes on `A_vv` + coarse ops are **not yet** handled (reusing + `_attach_stokes_nullspace` covers the SolCx pressure-nullspace case only) — a + Phase-2.5 follow-up. 2. **Drop the factory** — derive per-level constrained DOFs from boundary labels + BC components (no throwaway solver) for a clean API. 3. **Parallel (np>1)** — `_reduced_map` is serial; validate co-located rank-local From e0181ba0017f873833af3585eac7d31d8f913a31 Mon Sep 17 00:00:00 2001 From: lmoresi Date: Mon, 29 Jun 2026 13:37:18 +1000 Subject: [PATCH 06/15] refactor(custom_mg): drop level_solver_factory (Phase 1, Step 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each coarse level's BC-constrained reduced map is now derived directly from the coarse mesh DM via _coarse_reduced_map: clone the DM, copy the (built) finest solver's fields + DS onto it, createDS, read the global section. The DS carries UW's exact essential-BC definitions and is a topology-independent discretisation spec, so it constrains the matching boundary DOFs on ANY coarse mesh that shares the solver's boundary labels (nested or not). This removes the throwaway-solver factory (heavy, leak-prone, and an API wart): set_custom_fmg and CustomMGHierarchy.build no longer take a level_solver_factory. Leak-free — DM ops only, no SNES / JIT. Validated identical to the old factory path (reduced maps byte-equal: 126 + 510 DOFs on the SolCx 3-level hierarchy). test_1014/1015/1016/1017 all green (20). custom-P vs native FMG unchanged (6 vs 5 iters, sol match 3.7e-10). Underworld development team with AI support from Claude Code --- src/underworld3/utilities/custom_mg.py | 50 ++++++++++++++++++-------- tests/test_1016_custom_mg_hierarchy.py | 6 ++-- tests/test_1017_custom_mg_stokes.py | 16 ++------- 3 files changed, 40 insertions(+), 32 deletions(-) diff --git a/src/underworld3/utilities/custom_mg.py b/src/underworld3/utilities/custom_mg.py index 187634db..611638f6 100644 --- a/src/underworld3/utilities/custom_mg.py +++ b/src/underworld3/utilities/custom_mg.py @@ -233,6 +233,27 @@ def _reduced_map(dm, field_id=None): return r2f, n_full +def _coarse_reduced_map(solver, coarse_mesh, field_id=None): + """A COARSE level's BC-constrained reduced map, with NO throwaway solver. + + Clone the coarse mesh DM and copy the (built) finest ``solver``'s fields + DS + onto it. The DS carries UW's exact essential-BC definitions (the custom + essential-field boundaries), and is a topology-independent discretisation spec, + so ``createDS`` constrains the matching boundary DOFs on ANY coarse mesh that + carries the same boundary labels (nested or not). The resulting global section + gives the same reduced map a full same-discretisation solver would — validated + identical to the old ``level_solver_factory`` path. Leak-free: DM ops only, no + SNES / JIT. + + NOTE: serial, like ``_reduced_map`` (uses local indices); parallel correctness + is a Phase-3 item.""" + cdm = coarse_mesh.dm.clone() + solver.dm.copyFields(cdm) + solver.dm.copyDS(cdm) + cdm.createDS() + return _reduced_map(cdm, field_id) + + def _reduced_transfer(coarse_coords, fine_coords, r2f_c, r2f_f, ncomp, builder): """Build one prolongation reduced(coarse) -> reduced(fine): node-level scalar P -> interleave ``ncomp`` components -> drop BC rows/cols.""" @@ -395,11 +416,12 @@ def __init__(self, level_meshes, builder="barycentric", field_id=None): self.field_id = field_id self.transfers = None - def build(self, solver, level_solver_factory): + def build(self, solver): """Build the BC-reduced prolongations. ``solver`` is the (built) finest - solver; ``level_solver_factory(mesh) -> built solver`` provides a - same-discretisation solver on each COARSE level so its constrained - section gives that level's reduced map. The finest level uses ``solver``.""" + solver. Each COARSE level's BC-constrained reduced map is derived directly + from the coarse mesh DM by copying the finest solver's fields + DS onto it + (``_coarse_reduced_map``) — no throwaway solver. The finest level reads its + map from ``solver.dm``.""" var = solver.Unknowns.u degree = var.degree continuous = getattr(var, "continuous", True) @@ -411,10 +433,7 @@ def build(self, solver, level_solver_factory): if k == nlev - 1: rmap, nfull = _reduced_map(solver.dm, self.field_id) else: - tmp = level_solver_factory(mesh) - tmp._build(False, False, None) - tmp.snes.setUp() - rmap, nfull = _reduced_map(tmp.dm, self.field_id) + rmap, nfull = _coarse_reduced_map(solver, mesh, self.field_id) nc = nfull // c.shape[0] if nfull % c.shape[0] != 0: raise RuntimeError( @@ -448,20 +467,21 @@ def install(self, solver, verbose=False): # --------------------------------------------------------------------------- # # Entry points # --------------------------------------------------------------------------- # -def set_custom_fmg(solver, coarse_meshes, *, level_solver_factory, - builder="barycentric", field_id=None, verbose=False): +def set_custom_fmg(solver, coarse_meshes, *, builder="barycentric", + field_id=None, verbose=False): """Generalized custom-P FMG with BC-per-level reduction (the correct path). Registers a :class:`CustomMGHierarchy` on the solver so that the next ``solve()`` builds and installs it (build-time injection). The hierarchy is - ``[*coarse_meshes, solver.mesh]``; ``level_solver_factory(mesh)`` must return - a same-discretisation solver on a coarse level (used only to read its - BC-constrained section).""" + ``[*coarse_meshes, solver.mesh]``; each coarse level's BC-constrained reduced + map is derived directly from its DM by copying the solver's fields + DS + (``_coarse_reduced_map``), so ``coarse_meshes`` need only carry the same + boundary labels as the solver's mesh. For a saddle-point (Stokes) solver pass + ``field_id=0`` to target the velocity sub-block.""" solver._custom_mg = { "mode": "hierarchy", "hierarchy": CustomMGHierarchy(list(coarse_meshes) + [solver.mesh], builder=builder, field_id=field_id), - "factory": level_solver_factory, "verbose": verbose, } solver.is_setup = False @@ -477,7 +497,7 @@ def inject_custom_mg(solver): if isinstance(cfg, dict) and cfg.get("mode") == "hierarchy": h = cfg["hierarchy"] - h.build(solver, cfg["factory"]) + h.build(solver) h.install(solver, verbose=cfg.get("verbose", False)) return diff --git a/tests/test_1016_custom_mg_hierarchy.py b/tests/test_1016_custom_mg_hierarchy.py index 60c873b2..402ee9c4 100644 --- a/tests/test_1016_custom_mg_hierarchy.py +++ b/tests/test_1016_custom_mg_hierarchy.py @@ -29,7 +29,7 @@ def _hierarchy(): def test_custom_fmg_hierarchy_converges(): m0, coarse, fine = _hierarchy() s=_poisson(fine) - custom_mg.set_custom_fmg(s, coarse, level_solver_factory=_poisson, builder="barycentric") + custom_mg.set_custom_fmg(s, coarse, builder="barycentric") s.solve() assert s.snes.getKSP().getPC().getType()=="mg" assert s.snes.getKSP().getPC().getMGLevels()==3 @@ -44,7 +44,7 @@ def test_bc_per_level_reduction_no_zero_columns(): m0, coarse, fine = _hierarchy() s=_poisson(fine); s._build(False,False,None); s.snes.setUp() h=custom_mg.CustomMGHierarchy(coarse+[fine], builder="barycentric") - Ps=h.build(s, _poisson) # raises if any transfer has zero columns + Ps=h.build(s) # raises if any transfer has zero columns assert len(Ps)==2 for P in Ps: Pc=P.getValuesCSR(); import scipy.sparse as sp @@ -54,7 +54,7 @@ def test_bc_per_level_reduction_no_zero_columns(): def test_rbf_builder_also_works(): m0, coarse, fine = _hierarchy() s=_poisson(fine) - custom_mg.set_custom_fmg(s, coarse, level_solver_factory=_poisson, builder="rbf") + custom_mg.set_custom_fmg(s, coarse, builder="rbf") s.solve() assert s.snes.getKSP().getPC().getType()=="mg" assert s.snes.getConvergedReason()>0 diff --git a/tests/test_1017_custom_mg_stokes.py b/tests/test_1017_custom_mg_stokes.py index 5abe7b42..3ee4954e 100644 --- a/tests/test_1017_custom_mg_stokes.py +++ b/tests/test_1017_custom_mg_stokes.py @@ -46,16 +46,6 @@ def _walls(s): s.petsc_options["snes_type"] = "ksponly" -def _coarse_factory(mesh): - """Same-discretisation coarse-level Stokes (only its constrained section is - read by CustomMGHierarchy.build to derive the BC-reduced DOF map).""" - s = uw.systems.Stokes(mesh) - s.constitutive_model = uw.constitutive_models.ViscousFlowModel - s.constitutive_model.Parameters.shear_viscosity_0 = 1.0 - _walls(s) - return s - - def _solcx(mesh, eta_B=1e6): sol = A.SolCx(mesh, eta_A=1.0, eta_B=eta_B, x_c=0.5, n=1) s = uw.systems.Stokes(mesh) @@ -86,8 +76,7 @@ def test_custom_fmg_velocity_block_barycentric(): # custom-P geometric MG on the velocity block s, sol = _solcx(fine) - custom_mg.set_custom_fmg(s, coarse, level_solver_factory=_coarse_factory, - builder="barycentric", field_id=0) + custom_mg.set_custom_fmg(s, coarse, builder="barycentric", field_id=0) s.solve() vksp = _vel_ksp(s) @@ -108,8 +97,7 @@ def test_custom_fmg_velocity_block_rbf(): """The RBF builder must also drive a converging velocity-block MG solve.""" coarse, fine = _hierarchy() s, sol = _solcx(fine) - custom_mg.set_custom_fmg(s, coarse, level_solver_factory=_coarse_factory, - builder="rbf", field_id=0) + custom_mg.set_custom_fmg(s, coarse, builder="rbf", field_id=0) s.solve() vksp = _vel_ksp(s) assert s.snes.getConvergedReason() > 0 From 2e2c4f942ddc33b467e74ea73a2f4e228cc95df7 Mon Sep 17 00:00:00 2001 From: lmoresi Date: Mon, 29 Jun 2026 13:37:48 +1000 Subject: [PATCH 07/15] docs(custom_mg): mark factory removal done (Phase 1, Step 2) Underworld development team with AI support from Claude Code --- .../GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md b/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md index 19bbc72b..c64f396e 100644 --- a/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md +++ b/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md @@ -126,9 +126,8 @@ Landed (committed, tested — `test_1014/1015/1016`, 18 pass): - validated: scalar jump-coeff Poisson, 5-level (3 uniform + 2 SBR), 3 FMG iters vs GAMG 46. -**Current scope = experimental:** **serial**, and `set_custom_fmg` takes a -`level_solver_factory`. Scalar / single-field-vector **and** Stokes velocity-block -are now supported. +**Current scope = experimental:** **serial**. Scalar / single-field-vector **and** +Stokes velocity-block are supported; the throwaway-solver factory has been removed. Remaining hardening **before** this is a merge-ready general feature (in order): 1. ~~**Stokes / saddle-point** (velocity-block injection).~~ **DONE** (2026-06-29). @@ -146,10 +145,16 @@ Remaining hardening **before** this is a merge-ready general feature (in order): rigid-body modes on `A_vv` + coarse ops are **not yet** handled (reusing `_attach_stokes_nullspace` covers the SolCx pressure-nullspace case only) — a Phase-2.5 follow-up. -2. **Drop the factory** — derive per-level constrained DOFs from boundary labels + - BC components (no throwaway solver) for a clean API. -3. **Parallel (np>1)** — `_reduced_map` is serial; validate co-located rank-local - transfers (fast-follow). +2. ~~**Drop the factory**.~~ **DONE** (2026-06-29). Each coarse level's + BC-constrained reduced map is derived directly from its DM via + `_coarse_reduced_map`: clone the coarse DM, `copyFields` + `copyDS` from the + finest solver, `createDS`. The DS carries UW's exact essential-BC definitions + and is topology-independent, so it constrains any coarse mesh sharing the + solver's boundary labels — validated byte-identical to the old factory path, + leak-free (no SNES / JIT). `set_custom_fmg` / `build` no longer take a + `level_solver_factory`. +3. **Parallel (np>1)** — `_reduced_map` / `_coarse_reduced_map` are serial (local + indices); validate co-located rank-local transfers (fast-follow). ## Explicit non-goals for this pass - Knockout (shown to pay full-fine assembly; structural value only) — not pursued now. From ecabfed08617ffc99e00a235fcaafbfd26f39d95 Mon Sep 17 00:00:00 2001 From: lmoresi Date: Mon, 29 Jun 2026 13:43:23 +1000 Subject: [PATCH 08/15] feat(custom_mg): loud serial-only guard for np>1 (Phase 1, Step 3 partial) The reduced maps use rank-local DOF indices and the prolongations assemble as serial AIJ, so at np>1 custom-P would silently build wrong P (or hit a cryptic broadcast error). Add _require_serial: set_custom_fmg and inject_custom_mg now raise a clear NotImplementedError on >1 ranks, pointing to preconditioner='fmg'/'gamg'. Serial behaviour unchanged. Parallel test tests/parallel/test_1017_custom_mg_serial_guard_mpi.py (np=2) asserts both the set-time and legacy solve-time guards fire. Full parallel custom-P (nested co-partitioned, rank-local point location + ghosting, MPIAIJ assembly with global-section reduction) remains a designed fast-follow. Underworld development team with AI support from Claude Code --- src/underworld3/utilities/custom_mg.py | 16 ++++++ .../test_1017_custom_mg_serial_guard_mpi.py | 52 +++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 tests/parallel/test_1017_custom_mg_serial_guard_mpi.py diff --git a/src/underworld3/utilities/custom_mg.py b/src/underworld3/utilities/custom_mg.py index 611638f6..11c4a122 100644 --- a/src/underworld3/utilities/custom_mg.py +++ b/src/underworld3/utilities/custom_mg.py @@ -103,6 +103,20 @@ def phi(r): _BUILDERS = {"barycentric": barycentric_prolongation, "rbf": rbf_prolongation} +def _require_serial(where): + """Custom-P transfers are serial-only (experimental). The reduced maps use + rank-local DOF indices and the prolongations assemble as serial AIJ; at np>1 + they would silently build wrong P / mis-numbered transfers. Parallel support + (nested co-partitioned, rank-local P + MPIAIJ, global-section reduction) is a + designed fast-follow — until then, fail loudly rather than wrong.""" + from underworld3 import mpi + if mpi.size > 1: + raise NotImplementedError( + f"custom_mg ({where}): custom-P geometric MG is serial-only " + f"(running on {mpi.size} ranks). Parallel (np>1) support is not yet " + f"implemented; use preconditioner='fmg' (nested) or 'gamg' in parallel.") + + # --------------------------------------------------------------------------- # # Local Skeleton-Based Refinement (no MMG; on-rank; conforming) # --------------------------------------------------------------------------- # @@ -478,6 +492,7 @@ def set_custom_fmg(solver, coarse_meshes, *, builder="barycentric", (``_coarse_reduced_map``), so ``coarse_meshes`` need only carry the same boundary labels as the solver's mesh. For a saddle-point (Stokes) solver pass ``field_id=0`` to target the velocity sub-block.""" + _require_serial("set_custom_fmg") solver._custom_mg = { "mode": "hierarchy", "hierarchy": CustomMGHierarchy(list(coarse_meshes) + [solver.mesh], @@ -493,6 +508,7 @@ def inject_custom_mg(solver): - ``mode == "hierarchy"`` -> BC-per-level reduced path (correct, general); - legacy dict ``{coarse_meshes, kind}`` -> finest-only reduction (kept for back-compat; valid only when coarse levels are non-nested / unconstrained).""" + _require_serial("inject_custom_mg") cfg = solver._custom_mg if isinstance(cfg, dict) and cfg.get("mode") == "hierarchy": diff --git a/tests/parallel/test_1017_custom_mg_serial_guard_mpi.py b/tests/parallel/test_1017_custom_mg_serial_guard_mpi.py new file mode 100644 index 00000000..f9862e65 --- /dev/null +++ b/tests/parallel/test_1017_custom_mg_serial_guard_mpi.py @@ -0,0 +1,52 @@ +"""Parallel guard for custom-P geometric MG (custom_mg). + +Custom-P transfers are serial-only (experimental): the reduced maps use rank-local +DOF indices and the prolongations assemble as serial AIJ, so at np>1 they would +silently build wrong P. set_custom_fmg / inject must therefore FAIL LOUDLY in +parallel rather than produce incorrect results. Parallel (np>1) support is a +designed fast-follow (nested co-partitioned, rank-local P + MPIAIJ). + +Run: + mpirun -n 2 python -m pytest --with-mpi tests/parallel/test_1017_custom_mg_serial_guard_mpi.py +""" +import pytest +import sympy +import underworld3 as uw +from underworld3.utilities import custom_mg + +pytestmark = [pytest.mark.mpi(min_size=2), pytest.mark.timeout(120)] + + +def _box(cellSize, refinement=None): + return uw.meshing.UnstructuredSimplexBox( + minCoords=(0.0, 0.0), maxCoords=(1.0, 1.0), + cellSize=cellSize, refinement=refinement, qdegree=2) + + +def _poisson(mesh): + p = uw.systems.Poisson(mesh) + p.constitutive_model = uw.constitutive_models.DiffusionModel + p.constitutive_model.Parameters.diffusivity = 1 + p.f = 0.0 + p.add_dirichlet_bc(0.0, "Bottom"); p.add_dirichlet_bc(1.0, "Top") + return p + + +@pytest.mark.mpi(min_size=2) +def test_set_custom_fmg_raises_in_parallel(): + """set_custom_fmg must raise NotImplementedError (not silently mis-build) np>1.""" + assert uw.mpi.size > 1 + s = _poisson(_box(0.25, refinement=2)) + with pytest.raises(NotImplementedError): + custom_mg.set_custom_fmg(s, [_box(0.285), _box(0.142)], builder="barycentric") + + +@pytest.mark.mpi(min_size=2) +def test_inject_guard_blocks_legacy_path_in_parallel(): + """The legacy set_custom_mg path must also fail loudly at solve() in parallel.""" + assert uw.mpi.size > 1 + s = _poisson(_box(0.25, refinement=2)) + # legacy setter writes _custom_mg directly (no module guard); inject must catch it + s.set_custom_mg([_box(0.285), _box(0.142)], kind="barycentric") + with pytest.raises(NotImplementedError): + s.solve() From caff202014876e8bfb53b8f1d94a5a80ef4068de Mon Sep 17 00:00:00 2001 From: lmoresi Date: Mon, 29 Jun 2026 13:43:42 +1000 Subject: [PATCH 09/15] docs(custom_mg): record np>1 serial guard (Phase 1, Step 3) Underworld development team with AI support from Claude Code --- .../design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md b/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md index c64f396e..901ff5c9 100644 --- a/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md +++ b/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md @@ -154,7 +154,13 @@ Remaining hardening **before** this is a merge-ready general feature (in order): leak-free (no SNES / JIT). `set_custom_fmg` / `build` no longer take a `level_solver_factory`. 3. **Parallel (np>1)** — `_reduced_map` / `_coarse_reduced_map` are serial (local - indices); validate co-located rank-local transfers (fast-follow). + indices) and `P` assembles as serial AIJ. **Guarded** (2026-06-29): + `set_custom_fmg` / `inject_custom_mg` raise a clear `NotImplementedError` at + np>1 (`_require_serial`) so parallel cannot silently mis-build — test + `tests/parallel/test_1017_custom_mg_serial_guard_mpi.py`. The full + parallel-correct implementation (nested co-partitioned, rank-local point + location + ghost layer, MPIAIJ assembly with global-section reduction) remains + the designed fast-follow. ## Explicit non-goals for this pass - Knockout (shown to pay full-fine assembly; structural value only) — not pursued now. From 19b778c783aaa590f237e9fa1ae885500311a6d9 Mon Sep 17 00:00:00 2001 From: lmoresi Date: Mon, 29 Jun 2026 14:03:51 +1000 Subject: [PATCH 10/15] feat(custom_mg): parallel (np>1) custom-P FMG, nested co-partitioned (Phase 1, Step 3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hierarchy path now builds parallel-correct transfers. Each rank builds its block of P rank-locally: ghost-inclusive coarse coords mean every owned fine node lands in a local coarse simplex (verified 0 misses np=2/4), and the reduced global numbering rides the DM global section via a clean local->global-reduced map (_level_dof_layout scatters owned global indices out through globalToLocal; constrained DOFs stay -1, ghosts resolve to the owner's global index). Transfers assemble as MPIAIJ (owned fine rows, global coarse cols incl. off-rank); constrained coarse DOFs drop -> reduced->reduced. Parallel zero-column guard via P^T·1 + allreduce. _coarse_dof_layout reuses the leak-free copyDS trick for coarse levels. CustomMGHierarchy.build branches serial (scipy CSR) vs parallel (MPIAIJ). The guard now blocks only the legacy finest-only path (still serial). Validated np=1/2/4: scalar Poisson custom-P 4 iters and Stokes SolCx velocity block 6 iters, both matching the GAMG reference (and each other across rank counts). New tests/parallel/test_1017_custom_mg_parallel_mpi.py (scalar + Stokes correctness + legacy serial-guard). Serial 20 unchanged. Underworld development team with AI support from Claude Code --- src/underworld3/utilities/custom_mg.py | 150 +++++++++++++++--- .../test_1017_custom_mg_parallel_mpi.py | 113 +++++++++++++ .../test_1017_custom_mg_serial_guard_mpi.py | 52 ------ 3 files changed, 243 insertions(+), 72 deletions(-) create mode 100644 tests/parallel/test_1017_custom_mg_parallel_mpi.py delete mode 100644 tests/parallel/test_1017_custom_mg_serial_guard_mpi.py diff --git a/src/underworld3/utilities/custom_mg.py b/src/underworld3/utilities/custom_mg.py index 11c4a122..f8863940 100644 --- a/src/underworld3/utilities/custom_mg.py +++ b/src/underworld3/utilities/custom_mg.py @@ -278,6 +278,99 @@ def _reduced_transfer(coarse_coords, fine_coords, r2f_c, r2f_f, ncomp, builder): return Pr +# --------------------------------------------------------------------------- # +# Parallel (np>1) — nested co-partitioned, rank-local P + MPIAIJ +# +# Supported path: the levels are co-partitioned (uniform refine() / on-rank SBR +# keep a fine cell's parent coarse cell on the same rank), so each rank builds +# its block of P from its LOCAL (ghost-inclusive) coarse coords — point-location +# is rank-local. The reduced global numbering rides the DM global section. +# --------------------------------------------------------------------------- # +def _level_dof_layout(dm, field_id=None): + """Parallel DOF layout for one level: ``(l2g, rstart, rend, n_full)``. + + ``l2g[i]`` is the GLOBAL reduced index of local DOF ``i`` — ghost-resolved to + the owner's global index, and ``-1`` for a BC-constrained DOF. Built by + scattering each owned global index out to the local (incl. ghost) layout via + ``globalToLocal`` (constrained local DOFs have no global source, so they keep + the pre-set ``-1``). ``[rstart, rend)`` is this rank's owned global range.""" + sub = _field_subdm(dm, field_id) + gv = sub.getGlobalVec() + lv = sub.getLocalVec() + rstart, rend = gv.getOwnershipRange() + gv.array[:] = np.arange(rstart, rend, dtype=float) + lv.set(-1.0) + sub.globalToLocal(gv, lv, addv=PETSc.InsertMode.INSERT_VALUES) + l2g = np.rint(lv.array).astype(np.int64).copy() + n_full = lv.getLocalSize() + sub.restoreGlobalVec(gv) + sub.restoreLocalVec(lv) + return l2g, rstart, rend, n_full + + +def _coarse_dof_layout(solver, coarse_mesh, field_id=None): + """Parallel coarse-level DOF layout, no throwaway solver — same copyDS trick + as :func:`_coarse_reduced_map` but returning the parallel ``l2g`` layout.""" + cdm = coarse_mesh.dm.clone() + solver.dm.copyFields(cdm) + solver.dm.copyDS(cdm) + cdm.createDS() + return _level_dof_layout(cdm, field_id) + + +def _build_parallel_transfer(cc, fc, lay_c, lay_f, ncomp, builder, comm): + """One reduced->reduced prolongation as an MPIAIJ matrix. + + Node-level barycentric/RBF weights are built rank-locally (coarse LOCAL coords + incl. ghosts -> every owned fine node lands in a local coarse simplex). Each + fine OWNED DOF becomes a global row; its coarse contributions map through the + coarse ``l2g`` to global columns (off-rank columns are fine for MPIAIJ). + Constrained coarse DOFs (``l2g == -1``) drop out -> reduced->reduced.""" + l2g_c, cstart, cend, _ = lay_c + l2g_f, fstart, fend, _ = lay_f + Pn = builder(cc, fc).tocsr() # (n_f_nodes, n_c_nodes), local + nloc_f = fend - fstart + nloc_c = cend - cstart + + P = PETSc.Mat().create(comm=comm) + P.setSizes(((nloc_f, None), (nloc_c, None))) + P.setType("aij") + P.setUp() + for i in range(Pn.shape[0]): # fine local node + js = Pn.indices[Pn.indptr[i]:Pn.indptr[i + 1]] + ws = Pn.data[Pn.indptr[i]:Pn.indptr[i + 1]] + for c in range(ncomp): + grow = int(l2g_f[i * ncomp + c]) + if grow < fstart or grow >= fend: # set OWNED rows only + continue + gcols, vals = [], [] + for jj, w in zip(js.tolist(), ws.tolist()): + gcol = int(l2g_c[jj * ncomp + c]) + if gcol >= 0: # skip constrained coarse DOFs + gcols.append(gcol) + vals.append(w) + if gcols: + P.setValues([grow], gcols, vals, addv=PETSc.InsertMode.INSERT_VALUES) + P.assemble() + return P + + +def _assert_no_zero_columns_parallel(P, comm): + """Parallel zero-column guard: a coarse DOF with no fine image -> singular + Galerkin coarse operator. Column sums via P^T·1 (weights are positive, so a + zero sum means an empty column).""" + ones_f = P.createVecLeft(); ones_f.set(1.0) + colsum = P.createVecRight() + P.multTranspose(ones_f, colsum) + nzero_local = int((colsum.array == 0.0).sum()) + nzero = comm.tompi4py().allreduce(nzero_local) + ones_f.destroy(); colsum.destroy() + if nzero: + raise RuntimeError( + f"parallel transfer has {nzero} zero columns (coarse DOFs with no fine " + f"image) — BC-per-level reduction failed; coarse operator would be singular.") + + def _configure_pcmg(pc, Ps): """Reconfigure ``pc`` as a fresh PCMG (FMG F-cycle) driven by the supplied reduced->reduced prolongations ``Ps``, Galerkin RAP for coarse operators. @@ -434,25 +527,36 @@ def build(self, solver): """Build the BC-reduced prolongations. ``solver`` is the (built) finest solver. Each COARSE level's BC-constrained reduced map is derived directly from the coarse mesh DM by copying the finest solver's fields + DS onto it - (``_coarse_reduced_map``) — no throwaway solver. The finest level reads its - map from ``solver.dm``.""" + (no throwaway solver); the finest level reads its map from ``solver.dm``. + + Serial: scipy reduced->reduced CSR. Parallel (np>1): rank-local node-level + weights assembled into MPIAIJ transfers with global-section reduced + numbering (nested co-partitioned path).""" + from underworld3 import mpi var = solver.Unknowns.u degree = var.degree continuous = getattr(var, "continuous", True) nlev = len(self.level_meshes) + parallel = mpi.size > 1 - coords, r2f, ncomp = [], [], [] + coords, maps, ncomp = [], [], [] for k, mesh in enumerate(self.level_meshes): c = np.asarray(mesh._get_coords_for_basis(degree, continuous)) - if k == nlev - 1: - rmap, nfull = _reduced_map(solver.dm, self.field_id) + finest = (k == nlev - 1) + if parallel: + lay = (_level_dof_layout(solver.dm, self.field_id) if finest + else _coarse_dof_layout(solver, mesh, self.field_id)) + nfull = lay[3] + maps.append(lay) else: - rmap, nfull = _coarse_reduced_map(solver, mesh, self.field_id) + rmap, nfull = (_reduced_map(solver.dm, self.field_id) if finest + else _coarse_reduced_map(solver, mesh, self.field_id)) + maps.append(rmap) nc = nfull // c.shape[0] if nfull % c.shape[0] != 0: raise RuntimeError( f"level {k}: full DOFs {nfull} not divisible by nodes {c.shape[0]}") - coords.append(c); r2f.append(rmap); ncomp.append(nc) + coords.append(c); ncomp.append(nc) if len(set(ncomp)) != 1: raise RuntimeError(f"inconsistent component counts across levels: {ncomp}") @@ -460,15 +564,22 @@ def build(self, solver): Ps = [] for l in range(1, nlev): - Pr = _reduced_transfer(coords[l - 1], coords[l], r2f[l - 1], r2f[l], - nc, self.builder) - zc = int((np.asarray((Pr != 0).sum(axis=0)).ravel() == 0).sum()) - if zc: - raise RuntimeError( - f"transfer {l-1}->{l} has {zc} zero columns (coarse DOFs with no " - f"fine image) — BC-per-level reduction failed; coarse operator " - f"would be singular.") - Ps.append(_to_petsc_aij(Pr)) + if parallel: + P = _build_parallel_transfer(coords[l - 1], coords[l], + maps[l - 1], maps[l], nc, + self.builder, solver.dm.comm) + _assert_no_zero_columns_parallel(P, solver.dm.comm) + Ps.append(P) + else: + Pr = _reduced_transfer(coords[l - 1], coords[l], maps[l - 1], + maps[l], nc, self.builder) + zc = int((np.asarray((Pr != 0).sum(axis=0)).ravel() == 0).sum()) + if zc: + raise RuntimeError( + f"transfer {l-1}->{l} has {zc} zero columns (coarse DOFs with " + f"no fine image) — BC-per-level reduction failed; coarse " + f"operator would be singular.") + Ps.append(_to_petsc_aij(Pr)) self.transfers = Ps return Ps @@ -492,7 +603,6 @@ def set_custom_fmg(solver, coarse_meshes, *, builder="barycentric", (``_coarse_reduced_map``), so ``coarse_meshes`` need only carry the same boundary labels as the solver's mesh. For a saddle-point (Stokes) solver pass ``field_id=0`` to target the velocity sub-block.""" - _require_serial("set_custom_fmg") solver._custom_mg = { "mode": "hierarchy", "hierarchy": CustomMGHierarchy(list(coarse_meshes) + [solver.mesh], @@ -508,16 +618,16 @@ def inject_custom_mg(solver): - ``mode == "hierarchy"`` -> BC-per-level reduced path (correct, general); - legacy dict ``{coarse_meshes, kind}`` -> finest-only reduction (kept for back-compat; valid only when coarse levels are non-nested / unconstrained).""" - _require_serial("inject_custom_mg") cfg = solver._custom_mg if isinstance(cfg, dict) and cfg.get("mode") == "hierarchy": h = cfg["hierarchy"] - h.build(solver) + h.build(solver) # parallel-capable (nested co-partitioned) h.install(solver, verbose=cfg.get("verbose", False)) return - # ---- legacy finest-only path (back-compat) ------------------------------ + # ---- legacy finest-only path (back-compat, serial only) ----------------- + _require_serial("legacy custom_mg (set_custom_mg)") coarse_meshes = cfg["coarse_meshes"] builder = _BUILDERS[cfg["kind"]] var = solver.Unknowns.u diff --git a/tests/parallel/test_1017_custom_mg_parallel_mpi.py b/tests/parallel/test_1017_custom_mg_parallel_mpi.py new file mode 100644 index 00000000..e57f2c3b --- /dev/null +++ b/tests/parallel/test_1017_custom_mg_parallel_mpi.py @@ -0,0 +1,113 @@ +"""Parallel (np>1) correctness for custom-P geometric MG (custom_mg). + +The nested co-partitioned hierarchy path is parallel-capable: rank-local point +location (ghost-inclusive coarse coords) + global-section reduced numbering + +MPIAIJ transfers. These tests assert that, on >1 ranks, custom-P drives a +converging geometric-MG solve whose result matches a GAMG reference — for both a +scalar Poisson and the Stokes (SolCx) velocity block. The legacy finest-only +path remains serial-only and must still fail loudly. + +Run: + mpirun -n 2 python -m pytest --with-mpi tests/parallel/test_1017_custom_mg_parallel_mpi.py +""" +import numpy as np +import pytest +import underworld3 as uw +from underworld3.function import analytic as A +from underworld3.utilities import custom_mg + +pytestmark = [pytest.mark.mpi(min_size=2), pytest.mark.timeout(180)] + + +def _wrap(dm, m0, q=2): + return uw.discretisation.Mesh( + dm.clone(), simplex=True, + coordinate_system_type=m0.CoordinateSystem.coordinate_type, + qdegree=q, boundaries=m0.boundaries) + + +def _box(cellSize, refinement=None, q=2): + return uw.meshing.UnstructuredSimplexBox( + minCoords=(0., 0.), maxCoords=(1., 1.), + cellSize=cellSize, regular=True, refinement=refinement, qdegree=q) + + +def _rel_l2(a, b): + """Global relative L2 difference of two field-data arrays (rank-reduced).""" + da = np.asarray(a) - np.asarray(b) + num = uw.mpi.comm.allreduce(float(np.sum(da**2))) + den = uw.mpi.comm.allreduce(float(np.sum(np.asarray(b)**2))) + return (num**0.5) / (den**0.5 + 1e-30) + + +def _poisson(mesh): + p = uw.systems.Poisson(mesh) + p.constitutive_model = uw.constitutive_models.DiffusionModel + p.constitutive_model.Parameters.diffusivity = 1 + p.f = 0.0 + p.add_dirichlet_bc(0.0, "Bottom"); p.add_dirichlet_bc(1.0, "Top") + p.petsc_options["ksp_rtol"] = 1e-8; p.petsc_options["ksp_type"] = "cg" + return p + + +@pytest.mark.mpi(min_size=2) +def test_parallel_custom_fmg_scalar(): + """Scalar custom-P drives a converging geometric MG solve in parallel and + matches a GAMG solve of the same problem.""" + assert uw.mpi.size > 1 + m0 = _box(0.25); dm1 = m0.dm.refine(); dm2 = dm1.refine() + coarse = [_wrap(m0.dm, m0), _wrap(dm1, m0)]; fine = _wrap(dm2, m0) + + g = _poisson(_wrap(dm2, m0)); g.preconditioner = "gamg"; g.solve() + c = _poisson(fine) + custom_mg.set_custom_fmg(c, coarse, builder="barycentric") + c.solve() + + assert c.snes.getKSP().getPC().getType() == "mg" + assert c.snes.getConvergedReason() > 0 + assert _rel_l2(c.Unknowns.u.data, g.Unknowns.u.data) < 1e-4 + + +@pytest.mark.mpi(min_size=2) +def test_parallel_custom_fmg_stokes_velocity_block(): + """Custom-P on the SolCx velocity block converges in few MG iters in parallel + and matches the analytic velocity to the GAMG reference's accuracy.""" + assert uw.mpi.size > 1 + fine = _box(0.25, refinement=2, q=3) + coarse = [_wrap(d, fine, q=3) for d in fine.dm_hierarchy[:-1]] + + def solcx(mesh): + sol = A.SolCx(mesh, eta_A=1.0, eta_B=1e6, x_c=0.5, n=1) + s = uw.systems.Stokes(mesh) + s.constitutive_model = uw.constitutive_models.ViscousFlowModel + s.constitutive_model.Parameters.shear_viscosity_0 = sol.fn_viscosity + s.saddle_preconditioner = 1.0 / sol.fn_viscosity + s.bodyforce = sol.fn_bodyforce + s.add_dirichlet_bc((0.0, None), "Left"); s.add_dirichlet_bc((0.0, None), "Right") + s.add_dirichlet_bc((None, 0.0), "Bottom"); s.add_dirichlet_bc((None, 0.0), "Top") + s.petsc_use_pressure_nullspace = True; s.tolerance = 1e-8 + s.petsc_options["snes_type"] = "ksponly" + return s, sol + + sg, solg = solcx(fine); sg.preconditioner = "gamg"; sg.solve() + sc, sol = solcx(fine) + custom_mg.set_custom_fmg(sc, coarse, builder="barycentric", field_id=0) + sc.solve() + vksp = sc.snes.getKSP().getPC().getFieldSplitSubKSP()[0] + + assert sc.snes.getConvergedReason() > 0 + assert vksp.getPC().getType() == "mg" + assert vksp.getPC().getMGLevels() == len(coarse) + 1 + assert vksp.getIterationNumber() <= 15 # geometric MG, not GAMG (~200) + assert sol.velocity_error(sc.u) < 2.0 * solg.velocity_error(sg.u) + 1e-6 + + +@pytest.mark.mpi(min_size=2) +def test_legacy_path_serial_guard(): + """The legacy finest-only set_custom_mg path is serial-only and must raise + loudly in parallel (it has no parallel reduced map).""" + assert uw.mpi.size > 1 + s = _poisson(_box(0.25, refinement=2)) + s.set_custom_mg([_box(0.285), _box(0.142)], kind="barycentric") + with pytest.raises(NotImplementedError): + s.solve() diff --git a/tests/parallel/test_1017_custom_mg_serial_guard_mpi.py b/tests/parallel/test_1017_custom_mg_serial_guard_mpi.py deleted file mode 100644 index f9862e65..00000000 --- a/tests/parallel/test_1017_custom_mg_serial_guard_mpi.py +++ /dev/null @@ -1,52 +0,0 @@ -"""Parallel guard for custom-P geometric MG (custom_mg). - -Custom-P transfers are serial-only (experimental): the reduced maps use rank-local -DOF indices and the prolongations assemble as serial AIJ, so at np>1 they would -silently build wrong P. set_custom_fmg / inject must therefore FAIL LOUDLY in -parallel rather than produce incorrect results. Parallel (np>1) support is a -designed fast-follow (nested co-partitioned, rank-local P + MPIAIJ). - -Run: - mpirun -n 2 python -m pytest --with-mpi tests/parallel/test_1017_custom_mg_serial_guard_mpi.py -""" -import pytest -import sympy -import underworld3 as uw -from underworld3.utilities import custom_mg - -pytestmark = [pytest.mark.mpi(min_size=2), pytest.mark.timeout(120)] - - -def _box(cellSize, refinement=None): - return uw.meshing.UnstructuredSimplexBox( - minCoords=(0.0, 0.0), maxCoords=(1.0, 1.0), - cellSize=cellSize, refinement=refinement, qdegree=2) - - -def _poisson(mesh): - p = uw.systems.Poisson(mesh) - p.constitutive_model = uw.constitutive_models.DiffusionModel - p.constitutive_model.Parameters.diffusivity = 1 - p.f = 0.0 - p.add_dirichlet_bc(0.0, "Bottom"); p.add_dirichlet_bc(1.0, "Top") - return p - - -@pytest.mark.mpi(min_size=2) -def test_set_custom_fmg_raises_in_parallel(): - """set_custom_fmg must raise NotImplementedError (not silently mis-build) np>1.""" - assert uw.mpi.size > 1 - s = _poisson(_box(0.25, refinement=2)) - with pytest.raises(NotImplementedError): - custom_mg.set_custom_fmg(s, [_box(0.285), _box(0.142)], builder="barycentric") - - -@pytest.mark.mpi(min_size=2) -def test_inject_guard_blocks_legacy_path_in_parallel(): - """The legacy set_custom_mg path must also fail loudly at solve() in parallel.""" - assert uw.mpi.size > 1 - s = _poisson(_box(0.25, refinement=2)) - # legacy setter writes _custom_mg directly (no module guard); inject must catch it - s.set_custom_mg([_box(0.285), _box(0.142)], kind="barycentric") - with pytest.raises(NotImplementedError): - s.solve() From af8e734b431da181cf8d5f208323678227937c30 Mon Sep 17 00:00:00 2001 From: lmoresi Date: Mon, 29 Jun 2026 14:16:37 +1000 Subject: [PATCH 11/15] docs(custom_mg): record parallel np>1 done (Phase 1, Step 3) Underworld development team with AI support from Claude Code --- .../GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md b/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md index 901ff5c9..2cb670fc 100644 --- a/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md +++ b/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md @@ -120,16 +120,21 @@ non-nested parallel transfers; knockout-as-coarsening as an alternative Layer-2 (nested *or* non-nested, uniform *or* SBR-refined) → geometric FMG via custom-P with BC-per-level reduction. It does not depend on Layer 2. -Landed (committed, tested — `test_1014/1015/1016`, 18 pass): +Landed (committed, tested — `test_1014/1015/1016/1017` serial 20 + +`tests/parallel/test_1017_custom_mg_parallel_mpi.py` np2): - `CustomMGHierarchy`, `set_custom_fmg`, `sbr_refine`/`sbr_refine_where`; - automatic BC-per-level reduction + zero-column guard; -- validated: scalar jump-coeff Poisson, 5-level (3 uniform + 2 SBR), 3 FMG iters - vs GAMG 46. +- Stokes velocity-block injection; leak-free per-level reduction (copyDS, no + factory); parallel (np>1) nested co-partitioned transfers; +- validated: scalar jump-coeff Poisson 5-level (3 uniform + 2 SBR) 3 FMG iters + vs GAMG 46; SolCx velocity block 6 iters vs GAMG ~198, np=1/2/4. -**Current scope = experimental:** **serial**. Scalar / single-field-vector **and** -Stokes velocity-block are supported; the throwaway-solver factory has been removed. +**Current scope:** scalar / single-field-vector **and** Stokes velocity-block; +**serial and parallel** (nested co-partitioned). Non-nested custom-P is serial-only +(experimental). Hardening steps 1–3 are complete; what remains is test +tier-classification + undrafting PR #290. -Remaining hardening **before** this is a merge-ready general feature (in order): +Hardening steps (all complete as of 2026-06-29): 1. ~~**Stokes / saddle-point** (velocity-block injection).~~ **DONE** (2026-06-29). `set_custom_fmg(..., field_id=0)` drives custom-P geometric MG on the velocity sub-block. The sub-PC is unreachable until the monolithic Jacobian is assembled, @@ -153,14 +158,21 @@ Remaining hardening **before** this is a merge-ready general feature (in order): solver's boundary labels — validated byte-identical to the old factory path, leak-free (no SNES / JIT). `set_custom_fmg` / `build` no longer take a `level_solver_factory`. -3. **Parallel (np>1)** — `_reduced_map` / `_coarse_reduced_map` are serial (local - indices) and `P` assembles as serial AIJ. **Guarded** (2026-06-29): - `set_custom_fmg` / `inject_custom_mg` raise a clear `NotImplementedError` at - np>1 (`_require_serial`) so parallel cannot silently mis-build — test - `tests/parallel/test_1017_custom_mg_serial_guard_mpi.py`. The full - parallel-correct implementation (nested co-partitioned, rank-local point - location + ghost layer, MPIAIJ assembly with global-section reduction) remains - the designed fast-follow. +3. ~~**Parallel (np>1)**.~~ **DONE** (2026-06-29). The hierarchy path builds + parallel-correct transfers on the nested co-partitioned hierarchy: each rank + builds its block of `P` rank-locally (ghost-inclusive coarse coords → every + owned fine node lands in a local coarse simplex, verified 0 misses np=2/4), + the reduced global numbering rides the DM global section + (`_level_dof_layout` scatters owned global indices out via `globalToLocal` — + constrained DOFs `-1`, ghosts resolve to the owner's global), and transfers + assemble as MPIAIJ (owned fine rows, global coarse cols incl. off-rank; + constrained coarse DOFs drop → reduced→reduced). Parallel zero-column guard + via `Pᵀ·1` + allreduce. Validated np=1/2/4: scalar Poisson 4 iters + Stokes + SolCx velocity block 6 iters, matching the GAMG reference and each other + across rank counts (`tests/parallel/test_1017_custom_mg_parallel_mpi.py`). The + **legacy** finest-only path (`set_custom_mg` / `_reduce_to_global`) stays + serial-only and raises loudly at np>1. Non-nested custom-P in parallel remains + out of scope (cross-rank point location). ## Explicit non-goals for this pass - Knockout (shown to pay full-fine assembly; structural value only) — not pursued now. From 3686b7b84ca2491b848a2b22335922f3d36e28cd Mon Sep 17 00:00:00 2001 From: lmoresi Date: Mon, 29 Jun 2026 14:43:10 +1000 Subject: [PATCH 12/15] feat(custom_mg): cover all solver families (SNES_Vector hook; Vector + Constrained tests) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verify custom-P works for every solver family that consumes the mesh, so a mesh-owned adapted hierarchy drives MG uniformly: - SNES_Vector.solve was MISSING the inject_custom_mg hook (only SNES_Scalar and SNES_Stokes_SaddlePt had it) — so Vector_Projection silently stayed on GAMG. Add the hook (mirrors the scalar one). All 8 solver-subclass solve() overrides delegate to a base solve(), so this completes coverage: scalar branch -> Poisson/Darcy/Projection/AdvDiff/Diffusion; vector branch -> Vector_Projection; velocity-block -> Stokes/VE/Constrained/NS. - test_1017: add SNES_Vector (Vector_Projection, top-level vector PC, field_id=None) and Stokes_Constrained (free-slip multipliers, grouped [p,h] split, field_id=0) serial tests — both drive custom-P 'mg', constrained matches analytic SolCx to 4.4e-4. - parallel test: add Vector (passes np=2) and Constrained. The Constrained parallel test is SKIPPED: Stokes_Constrained is not parallel-safe yet — it segfaults at np>1 independently of custom-P (canonical test_1062_constrained_solcx also segfaults at np=2 under plain GAMG). It auto-enables when the constrained solver becomes parallel-ready. Serial: custom_mg 22 + projections + constrained green. Parallel np=2: scalar + Stokes-velocity + vector + legacy-guard pass. Underworld development team with AI support from Claude Code --- .../cython/petsc_generic_snes_solvers.pyx | 6 ++ .../test_1017_custom_mg_parallel_mpi.py | 59 ++++++++++++++++++ tests/test_1017_custom_mg_stokes.py | 61 +++++++++++++++++-- 3 files changed, 121 insertions(+), 5 deletions(-) diff --git a/src/underworld3/cython/petsc_generic_snes_solvers.pyx b/src/underworld3/cython/petsc_generic_snes_solvers.pyx index 58f21ab1..7c680a88 100644 --- a/src/underworld3/cython/petsc_generic_snes_solvers.pyx +++ b/src/underworld3/cython/petsc_generic_snes_solvers.pyx @@ -3724,6 +3724,12 @@ class SNES_Vector(SolverBaseClass): # Update constants (e.g. changed material params) before solve self._update_constants() + # Custom geometric-MG prolongation on the (top-level vector) PC, if + # registered via set_custom_fmg. Mirrors the SNES_Scalar hook. + if self._custom_mg is not None: + from underworld3.utilities.custom_mg import inject_custom_mg + inject_custom_mg(self) + # solve self._snes_solve_with_retries(gvec, divergence_retries, verbose) diff --git a/tests/parallel/test_1017_custom_mg_parallel_mpi.py b/tests/parallel/test_1017_custom_mg_parallel_mpi.py index e57f2c3b..0437a59e 100644 --- a/tests/parallel/test_1017_custom_mg_parallel_mpi.py +++ b/tests/parallel/test_1017_custom_mg_parallel_mpi.py @@ -102,6 +102,65 @@ def solcx(mesh): assert sol.velocity_error(sc.u) < 2.0 * solg.velocity_error(sg.u) + 1e-6 +@pytest.mark.mpi(min_size=2) +def test_parallel_custom_fmg_vector(): + """SNES_Vector (Vector_Projection) custom-P drives a top-level vector PCMG in + parallel (field_id=None, ncomp=dim). Fine mesh has NO native hierarchy, so the + 'mg' PC can only come from our custom-P.""" + import sympy + assert uw.mpi.size > 1 + m0 = _box(0.25); dm2 = m0.dm.refine().refine() + coarse = [_wrap(m0.dm, m0), _wrap(m0.dm.refine(), m0)] + fine = _wrap(dm2, m0) + x, y = fine.X + v = uw.discretisation.MeshVariable("Ucpp", fine, fine.dim, degree=2) + proj = uw.systems.Vector_Projection(fine, v) + proj.uw_function = sympy.Matrix([[sympy.sin(sympy.pi * x) * sympy.cos(sympy.pi * y), + sympy.cos(sympy.pi * x) * sympy.sin(sympy.pi * y)]]) + proj.smoothing = 1.0e-3 + proj.add_dirichlet_bc((0.0, 0.0), "Bottom") + proj.add_dirichlet_bc((0.0, 0.0), "Top") + custom_mg.set_custom_fmg(proj, coarse, builder="barycentric") + proj.solve() + assert proj.snes.getKSP().getPC().getType() == "mg" + assert proj.snes.getConvergedReason() > 0 + + +@pytest.mark.skip(reason="Stokes_Constrained is not parallel-safe yet — it " + "segfaults at np>1 independently of custom-P (the canonical " + "test_1062_constrained_solcx also segfaults at np=2, plain GAMG). " + "custom-P on the constrained velocity block works in SERIAL " + "(see test_1017_custom_mg_stokes.test_custom_fmg_stokes_constrained); " + "this auto-enables once the constrained solver is parallel-ready.") +@pytest.mark.mpi(min_size=2) +def test_parallel_custom_fmg_stokes_constrained(): + """Stokes_Constrained (free-slip multipliers, grouped [p,h] split) custom-P on + the velocity block in parallel — velocity must match the analytic SolCx.""" + import sympy + assert uw.mpi.size > 1 + m0 = _box(0.25, q=3); dm2 = m0.dm.refine().refine() + coarse = [_wrap(m0.dm, m0, q=3), _wrap(m0.dm.refine(), m0, q=3)] + fine = _wrap(dm2, m0, q=3) + sol = A.SolCx(fine, eta_A=1.0, eta_B=1.0e6, x_c=0.5, n=1) + s = uw.systems.Stokes_Constrained(fine) + s.constitutive_model = uw.constitutive_models.ViscousFlowModel + s.constitutive_model.Parameters.shear_viscosity_0 = sol.fn_viscosity + s.bodyforce = sol.fn_bodyforce + s.add_constraint_bc("Left", g=0.0, normal=sympy.Matrix([[-1.0, 0.0]])) + s.add_constraint_bc("Right", g=0.0, normal=sympy.Matrix([[1.0, 0.0]])) + s.add_constraint_bc("Bottom", g=0.0, normal=sympy.Matrix([[0.0, -1.0]])) + s.add_constraint_bc("Top", g=0.0, normal=sympy.Matrix([[0.0, 1.0]])) + s.petsc_use_pressure_nullspace = True + s.tolerance = 1.0e-9 + s.petsc_options["snes_type"] = "ksponly" + custom_mg.set_custom_fmg(s, coarse, builder="barycentric", field_id=0) + s.solve() + vksp = s.snes.getKSP().getPC().getFieldSplitSubKSP()[0] + assert s.snes.getConvergedReason() > 0 + assert vksp.getPC().getType() == "mg" + assert sol.velocity_error(s.u) < 5.0e-3 + + @pytest.mark.mpi(min_size=2) def test_legacy_path_serial_guard(): """The legacy finest-only set_custom_mg path is serial-only and must raise diff --git a/tests/test_1017_custom_mg_stokes.py b/tests/test_1017_custom_mg_stokes.py index 3ee4954e..5046d71d 100644 --- a/tests/test_1017_custom_mg_stokes.py +++ b/tests/test_1017_custom_mg_stokes.py @@ -1,9 +1,12 @@ -"""Layer-1 generalized FMG hierarchy on the STOKES velocity block (Phase-1 Step 1). +"""Layer-1 generalized FMG hierarchy across the solver families that consume a mesh. -Custom-built prolongations (barycentric / RBF) drive geometric multigrid on the -velocity sub-block of the saddle-point solver, via set_custom_fmg(field_id=0). -Validated on SolCx (eta_B=1e6): the velocity block converges in a handful of MG -iterations (vs ~200 for GAMG) and the solution matches a GAMG reference. +Custom-built prolongations (barycentric / RBF) drive geometric multigrid via +set_custom_fmg, validated here on each solver-PC topology so the same adapted-mesh +hierarchy works for every solver running on it: + - STOKES velocity sub-block (set_custom_fmg field_id=0) — SolCx eta_B=1e6; + - SNES_Vector (Vector_Projection) — top-level vector PC (field_id=None); + - Stokes_Constrained (free-slip via in-saddle multipliers) — velocity block under + the grouped [p, h] fieldsplit. The hard part (see custom_mg._install_velocity_block_transfers): the velocity sub-PC is unreachable until the monolithic Jacobian is assembled, so the install @@ -105,3 +108,51 @@ def test_custom_fmg_velocity_block_rbf(): assert vksp.getPC().getMGLevels() == len(coarse) + 1 assert vksp.getIterationNumber() <= 25 assert sol.velocity_error(s.u) < 1e-2 + + +def test_custom_fmg_vector_solver(): + """SNES_Vector (Vector_Projection): custom-P on the top-level vector PC + (field_id=None) — the same scalar/single-field-vector path, ncomp=dim.""" + import sympy + coarse, fine = _hierarchy() + x, y = fine.X + v = uw.discretisation.MeshVariable("Ucp", fine, fine.dim, degree=2) + proj = uw.systems.Vector_Projection(fine, v) + proj.uw_function = sympy.Matrix([[sympy.sin(sympy.pi * x) * sympy.cos(sympy.pi * y), + sympy.cos(sympy.pi * x) * sympy.sin(sympy.pi * y)]]) + proj.smoothing = 1.0e-3 + proj.add_dirichlet_bc((0.0, 0.0), "Bottom") + proj.add_dirichlet_bc((0.0, 0.0), "Top") + custom_mg.set_custom_fmg(proj, coarse, builder="barycentric") # field_id=None + proj.solve() + assert proj.snes.getKSP().getPC().getType() == "mg" + assert proj.snes.getKSP().getPC().getMGLevels() == len(coarse) + 1 + assert proj.snes.getConvergedReason() > 0 + + +def test_custom_fmg_stokes_constrained(): + """Stokes_Constrained (free-slip via in-saddle multipliers): custom-P on the + velocity block under the grouped [p, h] fieldsplit. Velocity must still match + the analytic SolCx free-slip solution.""" + import sympy + coarse, fine = _hierarchy() + sol = A.SolCx(fine, eta_A=1.0, eta_B=1.0e6, x_c=0.5, n=1) + s = uw.systems.Stokes_Constrained(fine) + s.constitutive_model = uw.constitutive_models.ViscousFlowModel + s.constitutive_model.Parameters.shear_viscosity_0 = sol.fn_viscosity + s.bodyforce = sol.fn_bodyforce + s.add_constraint_bc("Left", g=0.0, normal=sympy.Matrix([[-1.0, 0.0]])) + s.add_constraint_bc("Right", g=0.0, normal=sympy.Matrix([[1.0, 0.0]])) + s.add_constraint_bc("Bottom", g=0.0, normal=sympy.Matrix([[0.0, -1.0]])) + s.add_constraint_bc("Top", g=0.0, normal=sympy.Matrix([[0.0, 1.0]])) + s.petsc_use_pressure_nullspace = True + s.tolerance = 1.0e-9 + s.petsc_options["snes_type"] = "ksponly" + custom_mg.set_custom_fmg(s, coarse, builder="barycentric", field_id=0) + s.solve() + vksp = _vel_ksp(s) + assert s.snes.getConvergedReason() > 0 + assert vksp.getPC().getType() == "mg" + assert vksp.getPC().getMGLevels() == len(coarse) + 1 + assert vksp.getIterationNumber() <= 15 + assert sol.velocity_error(s.u) < 5.0e-3 From 32fec5adb3584d54254d6b1f994c7acb6425e3f0 Mon Sep 17 00:00:00 2001 From: lmoresi Date: Mon, 29 Jun 2026 14:43:39 +1000 Subject: [PATCH 13/15] docs(custom_mg): solver-family coverage matrix + constrained-parallel limitation Underworld development team with AI support from Claude Code --- .../GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md b/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md index 2cb670fc..8da85492 100644 --- a/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md +++ b/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md @@ -134,6 +134,23 @@ Landed (committed, tested — `test_1014/1015/1016/1017` serial 20 + (experimental). Hardening steps 1–3 are complete; what remains is test tier-classification + undrafting PR #290. +**Solver-family coverage** — the install keys off the PC topology, so one of two +branches covers every solver that consumes the mesh (all `solve()` overrides +delegate to a hooked base `solve()`; the `inject_custom_mg` hook lives in +`SNES_Scalar.solve`, `SNES_Vector.solve`, `SNES_Stokes_SaddlePt.solve`): + +| Solver family | Branch (`field_id`) | Serial | Parallel | +|---|---|---|---| +| `SNES_Scalar` (Poisson, Darcy, Projection, AdvDiff, Diffusion) | top-level (`None`) | ✓ | ✓ | +| `SNES_Vector` (Vector_Projection, displacement) | top-level (`None`) | ✓ | ✓ | +| `SNES_Stokes` / VE / NavierStokes | velocity block (`0`) | ✓ | ✓ | +| `SNES_Stokes_Constrained` (in-saddle multipliers) | velocity block (`0`) | ✓ | **skip** ¹ | + +¹ The constrained solver is **not parallel-safe today** — it segfaults at np>1 +*independently of custom-P* (the canonical `test_1062_constrained_solcx` also +segfaults at np=2 under plain GAMG). custom-P on the constrained velocity block +works in serial; the parallel test auto-enables once the solver is parallel-ready. + Hardening steps (all complete as of 2026-06-29): 1. ~~**Stokes / saddle-point** (velocity-block injection).~~ **DONE** (2026-06-29). `set_custom_fmg(..., field_id=0)` drives custom-P geometric MG on the velocity From d7a4ca17e0e78c5a233c89b008805a8ce559956b Mon Sep 17 00:00:00 2001 From: lmoresi Date: Mon, 29 Jun 2026 15:01:36 +1000 Subject: [PATCH 14/15] docs(custom_mg): link constrained-parallel segfault to issue #291 Underworld development team with AI support from Claude Code --- .../design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md | 8 +++++--- tests/parallel/test_1017_custom_mg_parallel_mpi.py | 5 +++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md b/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md index 8da85492..4380f1d0 100644 --- a/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md +++ b/docs/developer/design/GENERALIZED_FMG_HIERARCHY_AND_ADAPT.md @@ -147,9 +147,11 @@ delegate to a hooked base `solve()`; the `inject_custom_mg` hook lives in | `SNES_Stokes_Constrained` (in-saddle multipliers) | velocity block (`0`) | ✓ | **skip** ¹ | ¹ The constrained solver is **not parallel-safe today** — it segfaults at np>1 -*independently of custom-P* (the canonical `test_1062_constrained_solcx` also -segfaults at np=2 under plain GAMG). custom-P on the constrained velocity block -works in serial; the parallel test auto-enables once the solver is parallel-ready. +*independently of custom-P*, in the interior-multiplier section reduction +(`_constrain_interior_multipliers_in_section`; **issue #291**; the canonical +`test_1062_constrained_solcx` also segfaults at np=2 under plain GAMG; workaround +`_reduce_interior_multiplier = False`). custom-P on the constrained velocity block +works in serial; the parallel test auto-enables once #291 is fixed. Hardening steps (all complete as of 2026-06-29): 1. ~~**Stokes / saddle-point** (velocity-block injection).~~ **DONE** (2026-06-29). diff --git a/tests/parallel/test_1017_custom_mg_parallel_mpi.py b/tests/parallel/test_1017_custom_mg_parallel_mpi.py index 0437a59e..3a3e29a5 100644 --- a/tests/parallel/test_1017_custom_mg_parallel_mpi.py +++ b/tests/parallel/test_1017_custom_mg_parallel_mpi.py @@ -127,11 +127,12 @@ def test_parallel_custom_fmg_vector(): @pytest.mark.skip(reason="Stokes_Constrained is not parallel-safe yet — it " - "segfaults at np>1 independently of custom-P (the canonical " + "segfaults at np>1 independently of custom-P, in the " + "interior-multiplier section reduction (issue #291; canonical " "test_1062_constrained_solcx also segfaults at np=2, plain GAMG). " "custom-P on the constrained velocity block works in SERIAL " "(see test_1017_custom_mg_stokes.test_custom_fmg_stokes_constrained); " - "this auto-enables once the constrained solver is parallel-ready.") + "this auto-enables once #291 is fixed.") @pytest.mark.mpi(min_size=2) def test_parallel_custom_fmg_stokes_constrained(): """Stokes_Constrained (free-slip multipliers, grouped [p,h] split) custom-P on From 6d6d85ebdccbeba0a0839ad2ea2a45091b97e1e9 Mon Sep 17 00:00:00 2001 From: lmoresi Date: Mon, 29 Jun 2026 21:09:42 +1000 Subject: [PATCH 15/15] fix(custom_mg): wire assembled Jacobian into KSP before velocity-block fieldsplit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Stokes velocity-block injection forces computeJacobian, then reaches the fieldsplit BEFORE the SNES solve. SNESSolve normally wires the freshly-assembled Jacobian into the KSP/outer-PC; doing it ourselves was missing, so for some configurations (e.g. Stokes on an SBR-refined child mesh with a mesh-variable bodyforce) the outer PC carried an UNASSEMBLED operator and PCSetUp failed with "Matrix must be set first" (err73). Fix: snes.getKSP().setOperators(J, Pmat) after forcing assembly, before outer_pc.setUp(). Found via the Layer-2 adapt-on-top prototype (Stokes on a fault-refined child). Diagnosed: ksp.A set but asm=False while J.assembled=True -> operator not wired. New regression test_custom_fmg_stokes_on_sbr_child (Stokes velocity-block custom-P on an SBR child with a mesh-variable bodyforce) — fails pre-fix, passes after. test_1014/1015/1016/1017 green (serial); parallel np2 unchanged. Underworld development team with AI support from Claude Code --- src/underworld3/utilities/custom_mg.py | 6 +++++ tests/test_1017_custom_mg_stokes.py | 34 ++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/src/underworld3/utilities/custom_mg.py b/src/underworld3/utilities/custom_mg.py index f8863940..32023429 100644 --- a/src/underworld3/utilities/custom_mg.py +++ b/src/underworld3/utilities/custom_mg.py @@ -471,6 +471,12 @@ def _install_velocity_block_transfers(solver, Ps, verbose=False): snes.setFromOptions() solver.dm.restoreGlobalVec(x0) + # Wire the freshly-assembled Jacobian into the KSP/outer-PC. SNESSolve does + # this lazily, but we reach the fieldsplit BEFORE the solve — without it the + # outer PC can carry an unassembled operator and PCSetUp fails with + # "Matrix must be set first" (err73) for some configurations. + snes.getKSP().setOperators(J, Pmat) + # 2. split -> reach the velocity sub-KSP / sub-PC (field 0) outer_pc = snes.getKSP().getPC() outer_pc.setUp() diff --git a/tests/test_1017_custom_mg_stokes.py b/tests/test_1017_custom_mg_stokes.py index 5046d71d..bab16eb0 100644 --- a/tests/test_1017_custom_mg_stokes.py +++ b/tests/test_1017_custom_mg_stokes.py @@ -156,3 +156,37 @@ def test_custom_fmg_stokes_constrained(): assert vksp.getPC().getMGLevels() == len(coarse) + 1 assert vksp.getIterationNumber() <= 15 assert sol.velocity_error(s.u) < 5.0e-3 + + +def test_custom_fmg_stokes_on_sbr_child(): + """Stokes velocity-block custom-P on an SBR-refined CHILD mesh whose bodyforce + depends on a mesh variable (the Layer-2 adapt-on-top scenario). Regression for + the operator-not-wired bug: forcing the Jacobian before the fieldsplit left the + KSP carrying an unassembled operator -> PCSetUp 'Matrix must be set first' + (err73). Fixed by wiring the assembled Jacobian into the KSP before reaching + the velocity sub-PC.""" + import sympy + base = uw.meshing.UnstructuredSimplexBox( + minCoords=(0, 0), maxCoords=(1, 1), cellSize=0.25, regular=True, + refinement=2, qdegree=3) + child = _wrap(custom_mg.sbr_refine_where( + base.dm_hierarchy[-1], lambda c: abs(c[0] - 0.5) < 0.15), base) + coarse = [_wrap(d, base) for d in base.dm_hierarchy] # static base levels + + Tp = uw.discretisation.MeshVariable("Tp", child, 1, degree=2) # proxy field + Tp.data[:, 0] = 1.0 + s = uw.systems.Stokes(child) + s.constitutive_model = uw.constitutive_models.ViscousFlowModel + s.constitutive_model.Parameters.shear_viscosity_0 = 1.0 + s.saddle_preconditioner = 1.0 + s.bodyforce = sympy.Matrix([[0.0], [1.0e4 * Tp.sym[0]]]) # mesh-variable bodyforce + s.add_dirichlet_bc((0.0, 0.0), "Bottom"); s.add_dirichlet_bc((0.0, 0.0), "Top") + s.add_dirichlet_bc((0.0, None), "Left"); s.add_dirichlet_bc((0.0, None), "Right") + s.petsc_use_pressure_nullspace = True + s.petsc_options["snes_type"] = "ksponly" + custom_mg.set_custom_fmg(s, coarse, builder="barycentric", field_id=0) + s.solve() + vksp = _vel_ksp(s) + assert s.snes.getConvergedReason() > 0 + assert vksp.getPC().getType() == "mg" + assert vksp.getPC().getMGLevels() == len(coarse) + 1