From 31c92d3d164350019fd8a528eee24e8469058788 Mon Sep 17 00:00:00 2001
From: Chester Ismay <chester.ismay@gmail.com>
Date: Sun, 21 Jun 2026 09:46:52 -0700
Subject: [PATCH 1/4] tidy_summary: compute Q1/Q3 with linear interpolation
 (configurable)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

polars `.quantile()` defaults to "nearest", which diverges from R's quantile()
(type 7), NumPy, and the quartiles drawn by Plotly/ggplot2 boxplots. Add an
`interpolation="linear"` parameter (new default) so tidy_summary's five-number
summary matches the standard textbook/R convention and the boxplots shown beside
it. Pass interpolation="nearest" to restore the old behavior.

Queued for the next release (0.1.1) — not yet published.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 moderndive/modeling.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/moderndive/modeling.py b/moderndive/modeling.py
index c4834f5..b3aeb14 100644
--- a/moderndive/modeling.py
+++ b/moderndive/modeling.py
@@ -362,13 +362,23 @@ def get_regression_summaries(model, digits: int = 3) -> pl.DataFrame:
     return table.with_columns(pl.col(float_cols).round(digits))
 
 
-def tidy_summary(data, columns: list[str] | None = None, digits: int = 3) -> pl.DataFrame:
+def tidy_summary(
+    data,
+    columns: list[str] | None = None,
+    digits: int = 3,
+    interpolation: str = "linear",
+) -> pl.DataFrame:
     """Per-variable summary statistics for the selected columns.
 
     Mirrors the R ``moderndive::tidy_summary`` column layout:
     ``column, n, group, type, min, Q1, mean, median, Q3, max, sd``.
     Numeric columns get the five-number summary + mean/sd; non-numeric columns
     report ``n`` and ``type`` with the numeric fields left null.
+
+    ``interpolation`` selects how ``Q1``/``Q3`` are computed when a quartile falls
+    between two observations. The default ``"linear"`` matches R's ``quantile()``
+    (type 7), NumPy, and the quartiles drawn by Plotly/ggplot2 boxplots; pass any
+    other polars quantile method (e.g. ``"nearest"``) to override.
     """
     df = data if isinstance(data, pl.DataFrame) else pl.from_pandas(data)
     columns = columns or df.columns
@@ -395,10 +405,10 @@ def tidy_summary(data, columns: list[str] | None = None, digits: int = 3) -> pl.
             s = series.drop_nulls()
             row.update(
                 min=round(float(s.min()), digits),
-                Q1=round(float(s.quantile(0.25)), digits),
+                Q1=round(float(s.quantile(0.25, interpolation=interpolation)), digits),
                 mean=round(float(s.mean()), digits),
                 median=round(float(s.median()), digits),
-                Q3=round(float(s.quantile(0.75)), digits),
+                Q3=round(float(s.quantile(0.75, interpolation=interpolation)), digits),
                 max=round(float(s.max()), digits),
                 sd=round(float(s.std()), digits),
             )

From 3c0f89084aa639b6f7560194456093f09450b43f Mon Sep 17 00:00:00 2001
From: Chester Ismay <chester.ismay@gmail.com>
Date: Sun, 21 Jun 2026 09:53:51 -0700
Subject: [PATCH 2/4] docs: add CHANGELOG Unreleased entry for tidy_summary
 linear interpolation

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 CHANGELOG.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 283a416..6d57472 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,15 @@
 # Changelog
 
+## Unreleased
+
+### Changed
+
+- `tidy_summary()` now computes `Q1`/`Q3` with **linear** quantile interpolation
+  by default (matching R's `quantile()` type 7, NumPy, and the quartiles drawn by
+  Plotly/ggplot2 boxplots) instead of polars' default `"nearest"`. A new
+  `interpolation=` parameter exposes the choice; pass `interpolation="nearest"`
+  to restore the previous behavior.
+
 ## 0.1.0 (2026-06-20)
 
 Initial release of the Python companion to **ModernDive: Statistical Inference

From f1a41c2db7109a40b5e07be6e258ab1fe010ad5e Mon Sep 17 00:00:00 2001
From: Chester Ismay <chester.ismay@gmail.com>
Date: Sun, 21 Jun 2026 10:09:05 -0700
Subject: [PATCH 3/4] chisq_test: Yates correction by default (correct=True),
 matching R
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

R's chisq.test and this package's prop_test both default to Yates' continuity
correction; chisq_test previously hardcoded correction=False, an inconsistency.
Add a `correct: bool = True` parameter (applied only to 2x2 tables, like R and
scipy); pass correct=False for the uncorrected Pearson statistic that matches the
simulation-based calculate(stat="Chisq"). 322 tests pass at 100% coverage.

Queued for 0.1.1 — not yet released.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 CHANGELOG.md                 |  5 +++++
 moderndive/infer/wrappers.py | 10 +++++++++-
 tests/test_infer_parity.py   |  8 +++++++-
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6d57472..9924eab 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,11 @@
   Plotly/ggplot2 boxplots) instead of polars' default `"nearest"`. A new
   `interpolation=` parameter exposes the choice; pass `interpolation="nearest"`
   to restore the previous behavior.
+- `chisq_test()` now applies **Yates' continuity correction by default** for the
+  test of independence (`correct=True`), matching R's `chisq.test` and the
+  package's `prop_test`. As in R, the correction only affects 2x2 tables. Pass
+  `correct=False` for the uncorrected Pearson statistic (e.g. to match the
+  simulation-based `calculate(stat="Chisq")`).
 
 ## 0.1.0 (2026-06-20)
 
diff --git a/moderndive/infer/wrappers.py b/moderndive/infer/wrappers.py
index a1de893..272f168 100644
--- a/moderndive/infer/wrappers.py
+++ b/moderndive/infer/wrappers.py
@@ -181,6 +181,7 @@ def chisq_test(
     response: str | None = None,
     explanatory: str | None = None,
     p: dict | None = None,
+    correct: bool = True,
 ) -> pl.DataFrame:
     """Tidy chi-squared test.
 
@@ -188,6 +189,13 @@ def chisq_test(
     response and a ``p={level: probability, ...}`` mapping, it is a **goodness-of-fit**
     test against those hypothesized proportions. Returns ``statistic``,
     ``chisq_df``, ``p_value``.
+
+    ``correct`` applies Yates' continuity correction to the test of independence,
+    matching R's ``chisq.test`` default (``correct=TRUE``) and ``prop_test``; like
+    R, the correction only affects 2x2 tables (one degree of freedom). Pass
+    ``correct=False`` for the uncorrected Pearson statistic (e.g. to match the
+    simulation-based ``calculate(stat="Chisq")``). It does not apply to the
+    goodness-of-fit case.
     """
     from scipy import stats
 
@@ -210,7 +218,7 @@ def chisq_test(
         )
     sub = data.select(resp, expl).drop_nulls()
     table = sub.to_pandas().pivot_table(index=resp, columns=expl, aggfunc="size", fill_value=0)
-    chi2, pval, dof, _ = stats.chi2_contingency(table.to_numpy(), correction=False)
+    chi2, pval, dof, _ = stats.chi2_contingency(table.to_numpy(), correction=correct)
     return pl.DataFrame(
         {"statistic": [float(chi2)], "chisq_df": [int(dof)], "p_value": [float(pval)]}
     )
diff --git a/tests/test_infer_parity.py b/tests/test_infer_parity.py
index 5c75e64..339f094 100644
--- a/tests/test_infer_parity.py
+++ b/tests/test_infer_parity.py
@@ -127,9 +127,15 @@ def test_t_test_one_sample_tidy_columns():
 
 
 def test_chisq_test_df_and_stat():
+    # Default applies Yates' continuity correction (matches R's chisq.test and
+    # prop_test); on this weak 2x2 association the corrected statistic is ~0.
     out = chisq_test(_yawn(), formula="yawn ~ group")
     assert out["chisq_df"][0] == 1
-    assert out["statistic"][0] > 0
+    assert out["statistic"][0] >= 0
+    # The uncorrected Pearson statistic is strictly positive and larger.
+    raw = chisq_test(_yawn(), formula="yawn ~ group", correct=False)
+    assert raw["statistic"][0] > 0
+    assert raw["statistic"][0] > out["statistic"][0]
 
 
 # --- bias-corrected CI ----------------------------------------------------

From 2d194c5452b28ce02aa1c670ddbfb23fbd0d8e44 Mon Sep 17 00:00:00 2001
From: Chester Ismay <chester.ismay@gmail.com>
Date: Mon, 22 Jun 2026 08:01:20 -0700
Subject: [PATCH 4/4] make tidy_summary/chisq_test changes non-breaking
 (preserve 0.1.0 defaults)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both additions previously flipped a default, changing results for existing 0.1.0
users. Revert the defaults to 0.1.0 behavior and keep the new behavior opt-in:

- tidy_summary(interpolation=): default back to "nearest"; pass "linear" for
  R's quantile() type 7 / Plotly/ggplot2 boxplot quartiles.
- chisq_test(correct=): default back to False (uncorrected Pearson, matching
  calculate(stat="Chisq")); pass correct=True for R's chisq.test/prop_test.

Both are now purely additive (new opt-in parameters) and safe for a 0.1.1 patch.
Also document a CHANGELOG/RELEASING convention requiring any breaking change to
get a dedicated "⚠️ Breaking changes" section and a non-patch version bump.

322 tests, 100% coverage, ruff clean.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_017CTL1QSTg1DmDUpqYuPEog
---
 CHANGELOG.md                 | 33 +++++++++++++++++++++------------
 RELEASING.md                 |  4 ++++
 moderndive/infer/wrappers.py | 12 ++++++------
 moderndive/modeling.py       |  9 +++++----
 tests/test_infer_parity.py   | 14 +++++++-------
 5 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9924eab..90f9082 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,19 +1,28 @@
 # Changelog
 
+<!--
+Conventions: group entries under ### Added / ### Changed / ### Fixed.
+ANY behavior change that could alter existing users' results is a BREAKING change
+and MUST go in a dedicated, top-of-version "### ⚠️ Breaking changes" section that
+states (a) exactly what changed, (b) how to restore the previous behavior, and
+(c) why. Breaking changes require a minor/major version bump, never a patch.
+-->
+
 ## Unreleased
 
-### Changed
-
-- `tidy_summary()` now computes `Q1`/`Q3` with **linear** quantile interpolation
-  by default (matching R's `quantile()` type 7, NumPy, and the quartiles drawn by
-  Plotly/ggplot2 boxplots) instead of polars' default `"nearest"`. A new
-  `interpolation=` parameter exposes the choice; pass `interpolation="nearest"`
-  to restore the previous behavior.
-- `chisq_test()` now applies **Yates' continuity correction by default** for the
-  test of independence (`correct=True`), matching R's `chisq.test` and the
-  package's `prop_test`. As in R, the correction only affects 2x2 tables. Pass
-  `correct=False` for the uncorrected Pearson statistic (e.g. to match the
-  simulation-based `calculate(stat="Chisq")`).
+### Added
+
+- `tidy_summary()` gains an `interpolation=` parameter controlling how `Q1`/`Q3`
+  are computed. The default is unchanged from 0.1.0 (`"nearest"`); pass
+  `interpolation="linear"` for R's `quantile()` type 7 — also NumPy's default and
+  the quartiles drawn by Plotly/ggplot2 boxplots. **Non-breaking** (default
+  preserved).
+- `chisq_test()` gains a `correct=` parameter for Yates' continuity correction on
+  the test of independence. The default is unchanged from 0.1.0 (`correct=False`,
+  the uncorrected Pearson statistic, matching the simulation-based
+  `calculate(stat="Chisq")`); pass `correct=True` to match R's
+  `chisq.test`/`prop_test`. As in R, the correction only affects 2x2 tables.
+  **Non-breaking** (default preserved).
 
 ## 0.1.0 (2026-06-20)
 
diff --git a/RELEASING.md b/RELEASING.md
index c556294..a02c005 100644
--- a/RELEASING.md
+++ b/RELEASING.md
@@ -46,6 +46,10 @@ unzip -l dist/*.whl | grep -c parquet   # sanity: bundled datasets are present
 ## Cutting a release
 
 1. **Pick the version** (PyPI versions are immutable — you can't re-upload one).
+   Any change that can alter existing users' results is **breaking**: it needs a
+   dedicated `### ⚠️ Breaking changes` section in `CHANGELOG.md` (what changed,
+   how to restore the old behavior, why) and a **minor/major** bump — never a
+   patch. Prefer adding an opt-in parameter with the old default to avoid breaking.
 2. **Bump `version`** in `pyproject.toml`.
 3. **Update `CHANGELOG.md`**: rename the `## Unreleased` section to
    `## <version> (YYYY-MM-DD)` and start a fresh empty `## Unreleased` above it.
diff --git a/moderndive/infer/wrappers.py b/moderndive/infer/wrappers.py
index 272f168..6d60e63 100644
--- a/moderndive/infer/wrappers.py
+++ b/moderndive/infer/wrappers.py
@@ -181,7 +181,7 @@ def chisq_test(
     response: str | None = None,
     explanatory: str | None = None,
     p: dict | None = None,
-    correct: bool = True,
+    correct: bool = False,
 ) -> pl.DataFrame:
     """Tidy chi-squared test.
 
@@ -190,11 +190,11 @@ def chisq_test(
     test against those hypothesized proportions. Returns ``statistic``,
     ``chisq_df``, ``p_value``.
 
-    ``correct`` applies Yates' continuity correction to the test of independence,
-    matching R's ``chisq.test`` default (``correct=TRUE``) and ``prop_test``; like
-    R, the correction only affects 2x2 tables (one degree of freedom). Pass
-    ``correct=False`` for the uncorrected Pearson statistic (e.g. to match the
-    simulation-based ``calculate(stat="Chisq")``). It does not apply to the
+    ``correct`` applies Yates' continuity correction to the test of independence.
+    It defaults to ``False`` — the uncorrected Pearson statistic, matching
+    moderndive 0.1.0 and the simulation-based ``calculate(stat="Chisq")``. Pass
+    ``correct=True`` to match R's ``chisq.test``/``prop_test`` default; like R, the
+    correction only affects 2x2 tables (one degree of freedom) and never the
     goodness-of-fit case.
     """
     from scipy import stats
diff --git a/moderndive/modeling.py b/moderndive/modeling.py
index b3aeb14..72c3d75 100644
--- a/moderndive/modeling.py
+++ b/moderndive/modeling.py
@@ -366,7 +366,7 @@ def tidy_summary(
     data,
     columns: list[str] | None = None,
     digits: int = 3,
-    interpolation: str = "linear",
+    interpolation: str = "nearest",
 ) -> pl.DataFrame:
     """Per-variable summary statistics for the selected columns.
 
@@ -376,9 +376,10 @@ def tidy_summary(
     report ``n`` and ``type`` with the numeric fields left null.
 
     ``interpolation`` selects how ``Q1``/``Q3`` are computed when a quartile falls
-    between two observations. The default ``"linear"`` matches R's ``quantile()``
-    (type 7), NumPy, and the quartiles drawn by Plotly/ggplot2 boxplots; pass any
-    other polars quantile method (e.g. ``"nearest"``) to override.
+    between two observations. The default ``"nearest"`` matches moderndive 0.1.0
+    (polars' default). Pass ``interpolation="linear"`` for R's ``quantile()`` type
+    7 — also NumPy's default and the quartiles drawn by Plotly/ggplot2 boxplots —
+    or any other polars quantile method.
     """
     df = data if isinstance(data, pl.DataFrame) else pl.from_pandas(data)
     columns = columns or df.columns
diff --git a/tests/test_infer_parity.py b/tests/test_infer_parity.py
index 339f094..1cd32c2 100644
--- a/tests/test_infer_parity.py
+++ b/tests/test_infer_parity.py
@@ -127,15 +127,15 @@ def test_t_test_one_sample_tidy_columns():
 
 
 def test_chisq_test_df_and_stat():
-    # Default applies Yates' continuity correction (matches R's chisq.test and
-    # prop_test); on this weak 2x2 association the corrected statistic is ~0.
+    # Default is the uncorrected Pearson statistic (matches moderndive 0.1.0 and
+    # the simulation-based calculate(stat="Chisq")) — strictly positive here.
     out = chisq_test(_yawn(), formula="yawn ~ group")
     assert out["chisq_df"][0] == 1
-    assert out["statistic"][0] >= 0
-    # The uncorrected Pearson statistic is strictly positive and larger.
-    raw = chisq_test(_yawn(), formula="yawn ~ group", correct=False)
-    assert raw["statistic"][0] > 0
-    assert raw["statistic"][0] > out["statistic"][0]
+    assert out["statistic"][0] > 0
+    # Opt into Yates' continuity correction (R's chisq.test default); on this weak
+    # 2x2 association the corrected statistic is smaller.
+    corrected = chisq_test(_yawn(), formula="yawn ~ group", correct=True)
+    assert corrected["statistic"][0] < out["statistic"][0]
 
 
 # --- bias-corrected CI ----------------------------------------------------