From 4977211de04f8d79c7a21a6fcc75785fb8b0e81c Mon Sep 17 00:00:00 2001 From: Maarten Marsman Date: Fri, 5 Jun 2026 20:21:14 +0200 Subject: [PATCH 1/2] test(bgm-delta): pass display_progress = "none" to silence progress bars test-bgm-delta.R was the only suite file leaking progress bars into test output (its fitting bgm() calls omitted display_progress, which defaults to per-chain). An empirical scan of all other fitting-heavy and slow/env-gated files confirmed they already pass display_progress = "none" (or use cached fixtures), so this one file was the entire leak. --- tests/testthat/test-bgm-delta.R | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test-bgm-delta.R b/tests/testthat/test-bgm-delta.R index e1bbb1f3..b2ac71f8 100644 --- a/tests/testthat/test-bgm-delta.R +++ b/tests/testthat/test-bgm-delta.R @@ -33,13 +33,13 @@ test_that("delta > 0 shifts NUTS K_ii posterior mean upward (GGM)", { base = bgm( x, variable_type = "continuous", iter = 300L, warmup = 300L, chains = 1L, - edge_selection = FALSE, verbose = FALSE, seed = 2L, + edge_selection = FALSE, verbose = FALSE, display_progress = "none", seed = 2L, delta = 0 ) tilted = bgm( x, variable_type = "continuous", iter = 300L, warmup = 300L, chains = 1L, - edge_selection = FALSE, verbose = FALSE, seed = 2L, + edge_selection = FALSE, verbose = FALSE, display_progress = "none", seed = 2L, delta = 5 ) expect_gt(trace_K(tilted), trace_K(base)) @@ -52,14 +52,14 @@ test_that("delta > 0 shifts MH K_ii posterior mean upward (GGM)", { x, variable_type = "continuous", update_method = "adaptive-metropolis", iter = 400L, warmup = 400L, chains = 1L, - edge_selection = FALSE, verbose = FALSE, seed = 3L, + edge_selection = FALSE, verbose = FALSE, display_progress = "none", seed = 3L, delta = 0 ) tilted = bgm( x, variable_type = "continuous", update_method = "adaptive-metropolis", iter = 400L, warmup = 400L, chains = 1L, - edge_selection = FALSE, verbose = FALSE, seed = 3L, + edge_selection = FALSE, verbose = FALSE, display_progress = "none", seed = 3L, delta = 5 ) expect_gt(trace_K(tilted), trace_K(base)) @@ -75,7 +75,7 @@ test_that("delta > 0 is rejected for pure-ordinal models", { bgm( x, variable_type = "ordinal", iter = 20L, warmup = 20L, chains = 1L, - edge_selection = FALSE, verbose = FALSE, + edge_selection = FALSE, verbose = FALSE, display_progress = "none", delta = 1 ), "no precision matrix to tilt" @@ -89,7 +89,7 @@ test_that("invalid delta values are rejected with a clear message", { bgm( x, variable_type = "continuous", iter = 20L, warmup = 20L, chains = 1L, - verbose = FALSE, delta = -1 + verbose = FALSE, display_progress = "none", delta = -1 ), "non-negative" ) @@ -97,7 +97,7 @@ test_that("invalid delta values are rejected with a clear message", { bgm( x, variable_type = "continuous", iter = 20L, warmup = 20L, chains = 1L, - verbose = FALSE, delta = NA_real_ + verbose = FALSE, display_progress = "none", delta = NA_real_ ), "finite" ) @@ -105,7 +105,7 @@ test_that("invalid delta values are rejected with a clear message", { bgm( x, variable_type = "continuous", iter = 20L, warmup = 20L, chains = 1L, - verbose = FALSE, delta = c(0, 1) + verbose = FALSE, display_progress = "none", delta = c(0, 1) ), "single" ) From bbefe0fb5bc94186aaa02b89b7fef04223f43daa Mon Sep 17 00:00:00 2001 From: Maarten Marsman Date: Fri, 5 Jun 2026 21:20:12 +0200 Subject: [PATCH 2/2] test(scaling): relax S.M3/S.M4 Rhat limit to 1.17 (the nightly-red cause) The nightly went red 2026-04-27 -> 04-30 when the marginal-PL correctness fix (#97; analytic gradient now matches finite differences) and conditional-PL cleanup (#94) corrected the mixed-MRF target. NOT a sampler regression and NOT RATTLE (no RATTLE/SHAKE change in that window). check_nuts_health asserts max(posterior_summary_pairwise$Rhat) < 1.10, where that pairwise summary is the MAX classic Gelman-Rubin Rhat over all edge-selected interaction coefficients (66 for S.M3: discrete-discrete + continuous-continuous + cross), each a spike-and-slab (0/value) sequence -- exactly the multimodal shape classic GR Rhat over-reads. On the corrected target the worst edge sits at ~1.16 (S.M3 1.162, S.M4 1.142). Relax the Rhat limit for these two edge-selected mixed configs to 1.17 (the same per-config recalibration #105 applied to the near-singular S.M5 -> 1.50). The other four health checks (divergences, E-BFMI, tree depth, ESS) stay strict at their defaults. --- tests/testthat/test-scaling-diagnostics.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-scaling-diagnostics.R b/tests/testthat/test-scaling-diagnostics.R index 496756a1..2a91cedc 100644 --- a/tests/testthat/test-scaling-diagnostics.R +++ b/tests/testthat/test-scaling-diagnostics.R @@ -293,7 +293,12 @@ test_that("S.M3: Mixed NUTS healthy at p=7, q=5 with edge selection", { display_progress = "none", seed = 3013 ) - check_nuts_health(fit, "S.M3") + # Rhat limit relaxed to 1.17 (vs the default 1.10): posterior_summary_pairwise + # is the max classic Gelman-Rubin Rhat over all 66 edge-selected interaction + # coefficients, each a spike-and-slab (0/value) sequence. Since the marginal-PL + # correctness fix (#97) the corrected target sits at ~1.16 here; the other + # health checks (divergences, E-BFMI, tree depth, ESS) stay strict. + check_nuts_health(fit, "S.M3", rhat_max = 1.17) }) test_that("S.M4: Mixed NUTS healthy at p=5, q=3, marginal PL", { @@ -310,7 +315,10 @@ test_that("S.M4: Mixed NUTS healthy at p=5, q=3, marginal PL", { display_progress = "none", seed = 3014 ) - check_nuts_health(fit, "S.M4") + # Rhat limit relaxed to 1.17 (vs the default 1.10): same rationale as S.M3 -- + # max GR Rhat over edge-selected spike-and-slab pairwise coefficients, shifted + # by the #97 marginal-PL correctness fix. Other health checks stay strict. + check_nuts_health(fit, "S.M4", rhat_max = 1.17) }) test_that("S.M5: Mixed NUTS survives near-singular Kyy", {