moderndive · ismayc · Jun 22, 2026 · Jun 21, 2026 · Jun 21, 2026 · Jun 21, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,29 @@
 # Changelog
 
+<!--
+Conventions: group entries under ### Added / ### Changed / ### Fixed.
+ANY behavior change that could alter existing users' results is a BREAKING change
+and MUST go in a dedicated, top-of-version "### ⚠️ Breaking changes" section that
+states (a) exactly what changed, (b) how to restore the previous behavior, and
+(c) why. Breaking changes require a minor/major version bump, never a patch.
+-->
+
+## Unreleased
+
+### Added
+
+- `tidy_summary()` gains an `interpolation=` parameter controlling how `Q1`/`Q3`
+  are computed. The default is unchanged from 0.1.0 (`"nearest"`); pass
+  `interpolation="linear"` for R's `quantile()` type 7 — also NumPy's default and
+  the quartiles drawn by Plotly/ggplot2 boxplots. **Non-breaking** (default
+  preserved).
+- `chisq_test()` gains a `correct=` parameter for Yates' continuity correction on
+  the test of independence. The default is unchanged from 0.1.0 (`correct=False`,
+  the uncorrected Pearson statistic, matching the simulation-based
+  `calculate(stat="Chisq")`); pass `correct=True` to match R's
+  `chisq.test`/`prop_test`. As in R, the correction only affects 2x2 tables.
+  **Non-breaking** (default preserved).
+
 ## 0.1.0 (2026-06-20)
 
 Initial release of the Python companion to **ModernDive: Statistical Inference

diff --git a/RELEASING.md b/RELEASING.md
@@ -46,6 +46,10 @@ unzip -l dist/*.whl | grep -c parquet   # sanity: bundled datasets are present
 ## Cutting a release
 
 1. **Pick the version** (PyPI versions are immutable — you can't re-upload one).
+   Any change that can alter existing users' results is **breaking**: it needs a
+   dedicated `### ⚠️ Breaking changes` section in `CHANGELOG.md` (what changed,
+   how to restore the old behavior, why) and a **minor/major** bump — never a
+   patch. Prefer adding an opt-in parameter with the old default to avoid breaking.
 2. **Bump `version`** in `pyproject.toml`.
 3. **Update `CHANGELOG.md`**: rename the `## Unreleased` section to
    `## <version> (YYYY-MM-DD)` and start a fresh empty `## Unreleased` above it.

diff --git a/moderndive/infer/wrappers.py b/moderndive/infer/wrappers.py
@@ -181,13 +181,21 @@ def chisq_test(
     response: str | None = None,
     explanatory: str | None = None,
     p: dict | None = None,
+    correct: bool = False,
 ) -> pl.DataFrame:
     """Tidy chi-squared test.
 
     With an explanatory variable, this is a **test of independence**. With only a
     response and a ``p={level: probability, ...}`` mapping, it is a **goodness-of-fit**
     test against those hypothesized proportions. Returns ``statistic``,
     ``chisq_df``, ``p_value``.
+
+    ``correct`` applies Yates' continuity correction to the test of independence.
+    It defaults to ``False`` — the uncorrected Pearson statistic, matching
+    moderndive 0.1.0 and the simulation-based ``calculate(stat="Chisq")``. Pass
+    ``correct=True`` to match R's ``chisq.test``/``prop_test`` default; like R, the
+    correction only affects 2x2 tables (one degree of freedom) and never the
+    goodness-of-fit case.
     """
     from scipy import stats
 
@@ -210,7 +218,7 @@ def chisq_test(
         )
     sub = data.select(resp, expl).drop_nulls()
     table = sub.to_pandas().pivot_table(index=resp, columns=expl, aggfunc="size", fill_value=0)
-    chi2, pval, dof, _ = stats.chi2_contingency(table.to_numpy(), correction=False)
+    chi2, pval, dof, _ = stats.chi2_contingency(table.to_numpy(), correction=correct)
     return pl.DataFrame(
         {"statistic": [float(chi2)], "chisq_df": [int(dof)], "p_value": [float(pval)]}
     )

diff --git a/moderndive/modeling.py b/moderndive/modeling.py
@@ -362,13 +362,24 @@ def get_regression_summaries(model, digits: int = 3) -> pl.DataFrame:
     return table.with_columns(pl.col(float_cols).round(digits))
 
 
-def tidy_summary(data, columns: list[str] | None = None, digits: int = 3) -> pl.DataFrame:
+def tidy_summary(
+    data,
+    columns: list[str] | None = None,
+    digits: int = 3,
+    interpolation: str = "nearest",
+) -> pl.DataFrame:
     """Per-variable summary statistics for the selected columns.
 
     Mirrors the R ``moderndive::tidy_summary`` column layout:
     ``column, n, group, type, min, Q1, mean, median, Q3, max, sd``.
     Numeric columns get the five-number summary + mean/sd; non-numeric columns
     report ``n`` and ``type`` with the numeric fields left null.
+
+    ``interpolation`` selects how ``Q1``/``Q3`` are computed when a quartile falls
+    between two observations. The default ``"nearest"`` matches moderndive 0.1.0
+    (polars' default). Pass ``interpolation="linear"`` for R's ``quantile()`` type
+    7 — also NumPy's default and the quartiles drawn by Plotly/ggplot2 boxplots —
+    or any other polars quantile method.
     """
     df = data if isinstance(data, pl.DataFrame) else pl.from_pandas(data)
     columns = columns or df.columns
@@ -395,10 +406,10 @@ def tidy_summary(data, columns: list[str] | None = None, digits: int = 3) -> pl.
             s = series.drop_nulls()
             row.update(
                 min=round(float(s.min()), digits),
-                Q1=round(float(s.quantile(0.25)), digits),
+                Q1=round(float(s.quantile(0.25, interpolation=interpolation)), digits),
                 mean=round(float(s.mean()), digits),
                 median=round(float(s.median()), digits),
-                Q3=round(float(s.quantile(0.75)), digits),
+                Q3=round(float(s.quantile(0.75, interpolation=interpolation)), digits),
                 max=round(float(s.max()), digits),
                 sd=round(float(s.std()), digits),
             )

diff --git a/tests/test_infer_parity.py b/tests/test_infer_parity.py
@@ -127,9 +127,15 @@ def test_t_test_one_sample_tidy_columns():
 
 
 def test_chisq_test_df_and_stat():
+    # Default is the uncorrected Pearson statistic (matches moderndive 0.1.0 and
+    # the simulation-based calculate(stat="Chisq")) — strictly positive here.
     out = chisq_test(_yawn(), formula="yawn ~ group")
     assert out["chisq_df"][0] == 1
     assert out["statistic"][0] > 0
+    # Opt into Yates' continuity correction (R's chisq.test default); on this weak
+    # 2x2 association the corrected statistic is smaller.
+    corrected = chisq_test(_yawn(), formula="yawn ~ group", correct=True)
+    assert corrected["statistic"][0] < out["statistic"][0]
 
 
 # --- bias-corrected CI ----------------------------------------------------