From b22247779fba8346731f4005d9663ccf0f1d874e Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 14:58:54 -0400
Subject: [PATCH 01/55] Add col_pct_missing()

---
 pointblank/validate.py | 172 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 172 insertions(+)

diff --git a/pointblank/validate.py b/pointblank/validate.py
index d58e33caa..d0b78c277 100644
--- a/pointblank/validate.py
+++ b/pointblank/validate.py
@@ -10404,6 +10404,178 @@ def col_pct_null(
 
         return self
 
+    def col_pct_missing(
+        self,
+        columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
+        missing: MissingSpec,
+        max_pct: float,
+        reason: str | None = None,
+        category: str | None = None,
+        thresholds: int | float | None | bool | tuple | dict | Thresholds = None,
+        actions: Actions | None = None,
+        brief: str | bool | None = None,
+        active: bool | Callable = True,
+    ) -> Validate:
+        """
+        Validate that the percentage of *structured* missing values stays within a limit.
+
+        The `col_pct_missing()` validation method checks whether the percentage of missing values
+        in a column is at most `max_pct=`. Unlike [`col_pct_null()`](`pointblank.Validate.col_pct_null`),
+        which only considers actual null values, this method uses a
+        [`MissingSpec`](`pointblank.MissingSpec`) to define which values count as missing: declared
+        sentinel values (e.g., `-99` for `"refused"`) and, when `null_is_missing=True`, actual null
+        values. This validation operates at the column level, generating a single validation step
+        per column that passes when the missing percentage does not exceed `max_pct=`.
+
+        You can narrow the check to a single reason (via `reason=`) or a category of reasons (via
+        `category=`), making it possible to assert things like "at most 10% of values were refused"
+        or "at most 15% are item nonresponse".
+
+        Parameters
+        ----------
+        columns
+            A single column or a list of columns to validate. Can also use
+            [`col()`](`pointblank.col`) with column selectors to specify one or more columns. If
+            multiple columns are supplied or resolved, there will be a separate validation step
+            generated for each column.
+        missing
+            A [`MissingSpec`](`pointblank.MissingSpec`) describing the sentinel values (and their
+            reasons) that encode missingness for this column.
+        max_pct
+            The maximum allowable percentage of missing values, expressed as a decimal between
+            `0.0` and `1.0`. For example, `max_pct=0.20` means at most 20% of values may be missing.
+        reason
+            If provided, only count missing values whose reason matches this label. Cannot be
+            combined with `category=`.
+        category
+            If provided, only count missing values whose reason falls in this category (as defined
+            in `MissingSpec.categories`). Cannot be combined with `reason=`.
+        thresholds
+            Set threshold failure levels for reporting and reacting to exceedences of the levels.
+            The thresholds are set at the step level and will override any global thresholds set in
+            `Validate(thresholds=...)`. The default is `None`, which means that no thresholds will
+            be set locally and global thresholds (if any) will take effect.
+        actions
+            Optional actions to take when the validation step(s) meets or exceeds any set threshold
+            levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
+            define the actions.
+        brief
+            An optional brief description of the validation step that will be displayed in the
+            reporting table. You can use the templating elements like `"{step}"` to insert
+            the step number, or `"{auto}"` to include an automatically generated brief. If `True`
+            the entire brief will be automatically generated. If `None` (the default) then there
+            won't be a brief.
+        active
+            A boolean value or callable that determines whether the validation step should be
+            active. Using `False` will make the validation step inactive (still reporting its
+            presence and keeping indexes for the steps unchanged).
+
+        Returns
+        -------
+        Validate
+            The `Validate` object with the added validation step.
+
+        Examples
+        --------
+        ```{python}
+        #| echo: false
+        #| output: false
+        import pointblank as pb
+        pb.config(report_incl_header=False, report_incl_footer_timings=False, preview_incl_header=False)
+        ```
+        Survey data often encodes missingness with sentinel values rather than nulls. Here, the
+        `age` column uses `-99` (`"not_asked"`), `-98` (`"refused"`), and `-97` (`"dont_know"`):
+
+        ```{python}
+        import pointblank as pb
+        import polars as pl
+
+        tbl = pl.DataFrame(
+            {"age": [34, -98, 41, -99, 29, -98, 55, 38]},
+        )
+
+        age_missing = pb.MissingSpec(
+            reasons={-99: "not_asked", -98: "refused", -97: "dont_know"},
+            categories={"item_nonresponse": ["refused", "dont_know"]},
+        )
+
+        validation = (
+            pb.Validate(data=tbl)
+            .col_pct_missing(columns="age", missing=age_missing, max_pct=0.5)
+            .col_pct_missing(columns="age", missing=age_missing, reason="refused", max_pct=0.30)
+            .interrogate()
+        )
+
+        validation
+        ```
+        """
+        assertion_type = _get_fn_name()
+
+        _check_column(column=columns)
+        _check_thresholds(thresholds=thresholds)
+        _check_active_input(param=active, param_name="active")
+
+        if not isinstance(missing, MissingSpec):
+            raise TypeError(
+                f"`missing=` must be a MissingSpec, got {type(missing).__name__}."
+            )
+
+        if reason is not None and category is not None:
+            raise ValueError("Only one of `reason=` or `category=` can be specified.")
+
+        if not 0.0 <= max_pct <= 1.0:
+            raise ValueError(f"`max_pct=` must be between 0.0 and 1.0, got {max_pct}.")
+
+        # Resolve which sentinel values (and whether nulls) count as missing for this step
+        if reason is not None:
+            sentinels = missing.values_for_reason(reason)
+            count_null = missing.null_is_missing and missing.null_reason == reason
+        elif category is not None:
+            sentinels = missing.values_for_category(category)
+            cat_reasons = (missing.categories or {}).get(category, [])
+            count_null = missing.null_is_missing and missing.null_reason in cat_reasons
+        else:
+            sentinels = missing.sentinel_values()
+            count_null = missing.null_is_missing
+
+        # Determine threshold to use (global or local) and normalize a local `thresholds=` value
+        thresholds = (
+            self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
+        )
+
+        # If `columns` is a ColumnSelector or Narwhals selector, call `col()` on it to later
+        # resolve the columns
+        if isinstance(columns, (ColumnSelector, nw.selectors.Selector)):
+            columns = col(columns)
+
+        # If `columns` is Column value or a string, place it in a list for iteration
+        if isinstance(columns, (Column, str)):
+            columns = [columns]
+
+        # Determine brief to use (global or local) and transform any shorthands of `brief=`
+        brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
+
+        # Iterate over the columns and create a validation step for each
+        for column in columns:
+            val_info = _ValidationInfo(
+                assertion_type=assertion_type,
+                column=column,
+                values={
+                    "sentinels": sentinels,
+                    "count_null": count_null,
+                    "max_pct": max_pct,
+                    "reason": reason,
+                    "category": category,
+                },
+                thresholds=thresholds,
+                actions=actions,
+                brief=brief,
+                active=active,
+            )
+
+            self._add_validation(validation_info=val_info)
+
+        return self
     def rows_distinct(
         self,
         columns_subset: str | list[str] | None = None,

From 00c571f866ae392efee17cbbc02fb3b233cf1de4 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 14:59:12 -0400
Subject: [PATCH 02/55] Add col_missing_coded()

---
 pointblank/validate.py | 139 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 139 insertions(+)

diff --git a/pointblank/validate.py b/pointblank/validate.py
index d0b78c277..ac4cdc28e 100644
--- a/pointblank/validate.py
+++ b/pointblank/validate.py
@@ -10576,6 +10576,145 @@ def col_pct_missing(
             self._add_validation(validation_info=val_info)
 
         return self
+
+    def col_missing_coded(
+        self,
+        columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
+        missing: MissingSpec,
+        pre: Callable | None = None,
+        segments: SegmentSpec | None = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
+        actions: Actions | None = None,
+        brief: str | bool | None = None,
+        active: bool | Callable = True,
+    ) -> Validate:
+        """
+        Validate that all missing values in a column are *coded* (no uncoded nulls).
+
+        The `col_missing_coded()` validation method checks that every absent value in a column is
+        expressed with an explicit missing-value code, rather than a raw null. Under the structured
+        missingness model (see [`MissingSpec`](`pointblank.MissingSpec`)), every absence should
+        carry a *reason* — encoded as a sentinel value such as `-99` for `"not_asked"`. A raw null
+        represents *uncoded* (unknown) missingness, so this validation treats raw nulls as failing
+        test units while declared sentinel values and real values pass.
+
+        This validation operates over the number of test units equal to the number of rows in the
+        table (determined after any `pre=` mutation has been applied).
+
+        Parameters
+        ----------
+        columns
+            A single column or a list of columns to validate. Can also use
+            [`col()`](`pointblank.col`) with column selectors to specify one or more columns. If
+            multiple columns are supplied or resolved, there will be a separate validation step
+            generated for each column.
+        missing
+            A [`MissingSpec`](`pointblank.MissingSpec`) describing the sentinel values (and their
+            reasons) that encode missingness for this column. The spec documents which codes are
+            considered valid expressions of missingness.
+        pre
+            An optional preprocessing function or lambda to apply to the data table during
+            interrogation. This function should take a table as input and return a modified table.
+        segments
+            An optional directive on segmentation, which serves to split a validation step into
+            multiple (one step per segment).
+        thresholds
+            Set threshold failure levels for reporting and reacting to exceedences of the levels.
+            The thresholds are set at the step level and will override any global thresholds set in
+            `Validate(thresholds=...)`.
+        actions
+            Optional actions to take when the validation step(s) meets or exceeds any set threshold
+            levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
+            define the actions.
+        brief
+            An optional brief description of the validation step that will be displayed in the
+            reporting table. You can use the templating elements like `"{step}"` to insert
+            the step number, or `"{auto}"` to include an automatically generated brief. If `True`
+            the entire brief will be automatically generated. If `None` (the default) then there
+            won't be a brief.
+        active
+            A boolean value or callable that determines whether the validation step should be
+            active. Using `False` will make the validation step inactive (still reporting its
+            presence and keeping indexes for the steps unchanged).
+
+        Returns
+        -------
+        Validate
+            The `Validate` object with the added validation step.
+
+        Examples
+        --------
+        ```{python}
+        #| echo: false
+        #| output: false
+        import pointblank as pb
+        pb.config(report_incl_header=False, report_incl_footer_timings=False, preview_incl_header=False)
+        ```
+        Here, the `age` column codes its missingness with sentinel values, except for one row that
+        has a raw null (an uncoded absence):
+
+        ```{python}
+        import pointblank as pb
+        import polars as pl
+
+        tbl = pl.DataFrame({"age": [34, -98, 41, None, 29, -99, 55, 38]})
+
+        age_missing = pb.MissingSpec(
+            reasons={-99: "not_asked", -98: "refused", -97: "dont_know"},
+        )
+
+        validation = (
+            pb.Validate(data=tbl)
+            .col_missing_coded(columns="age", missing=age_missing)
+            .interrogate()
+        )
+
+        validation
+        ```
+
+        The validation reports a single failing test unit: the row where `age` is a raw null, which
+        represents missingness without a documented reason.
+        """
+        assertion_type = _get_fn_name()
+
+        _check_column(column=columns)
+        _check_pre(pre=pre)
+        _check_thresholds(thresholds=thresholds)
+        _check_active_input(param=active, param_name="active")
+
+        if not isinstance(missing, MissingSpec):
+            raise TypeError(
+                f"`missing=` must be a MissingSpec, got {type(missing).__name__}."
+            )
+
+        # Determine threshold to use (global or local) and normalize a local `thresholds=` value
+        thresholds = (
+            self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
+        )
+
+        columns = _resolve_columns(columns)
+
+        # Determine brief to use (global or local) and transform any shorthands of `brief=`
+        brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
+
+        # Iterate over the columns and create a validation step for each
+        for column in columns:
+            val_info = _ValidationInfo(
+                assertion_type=assertion_type,
+                column=column,
+                values=missing,
+                pre=pre,
+                segments=segments,
+                thresholds=thresholds,
+                actions=actions,
+                brief=brief,
+                active=active,
+            )
+
+            self._add_validation(validation_info=val_info)
+
+        return self
+
     def rows_distinct(
         self,
         columns_subset: str | list[str] | None = None,

From 17aaf5f4658c591371a6cd959b03d1e5eaf68a07 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 14:59:29 -0400
Subject: [PATCH 03/55] Add _create_text_col_pct_missing() util fn

---
 pointblank/validate.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/pointblank/validate.py b/pointblank/validate.py
index ac4cdc28e..4f64bbe3b 100644
--- a/pointblank/validate.py
+++ b/pointblank/validate.py
@@ -20004,6 +20004,40 @@ def _create_text_col_pct_null(
     return text
 
 
+def _create_text_col_pct_missing(
+    lang: str,
+    column: str | None,
+    value: dict,
+    for_failure: bool = False,
+    locale: str | None = None,
+) -> str:
+    """Create autobrief/failure text for col_pct_missing validation."""
+    type_ = _expect_failure_type(for_failure=for_failure)
+
+    column_text = _prep_column_text(column=column)
+
+    fmt_locale = locale if locale else lang
+
+    max_pct_value = value.get("max_pct", 0) * 100  # Convert to percentage
+    max_pct_formatted = _format_number_safe(max_pct_value, decimals=1, locale=fmt_locale)
+
+    return EXPECT_FAIL_TEXT[f"col_pct_missing_{type_}_text"][lang].format(
+        column_text=column_text,
+        max_pct=max_pct_formatted,
+    )
+
+
+def _create_text_col_missing_coded(lang: str, column: str | None, for_failure: bool = False) -> str:
+    """Create autobrief/failure text for col_missing_coded validation."""
+    type_ = _expect_failure_type(for_failure=for_failure)
+
+    column_text = _prep_column_text(column=column)
+
+    return EXPECT_FAIL_TEXT[f"col_missing_coded_{type_}_text"][lang].format(
+        column_text=column_text,
+    )
+
+
 def _create_text_conjointly(lang: str, for_failure: bool = False) -> str:
     type_ = _expect_failure_type(for_failure=for_failure)
 

From db9228729d4d045055026299ef567bd003f42141 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 14:59:41 -0400
Subject: [PATCH 04/55] Update validate.py

---
 pointblank/validate.py | 49 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/pointblank/validate.py b/pointblank/validate.py
index 4f64bbe3b..f9cc73c7d 100644
--- a/pointblank/validate.py
+++ b/pointblank/validate.py
@@ -63,6 +63,7 @@
     SpeciallyValidation,
     col_count_match,
     col_exists,
+    col_pct_missing,
     col_pct_null,
     col_schema_match,
     col_vals_expr,
@@ -75,6 +76,7 @@
     interrogate_le,
     interrogate_lt,
     interrogate_ne,
+    interrogate_missing_coded,
     interrogate_not_null,
     interrogate_notin,
     interrogate_null,
@@ -85,6 +87,7 @@
     rows_complete,
 )
 from pointblank._typing import SegmentSpec
+from pointblank.missing import MissingSpec
 from pointblank._utils import (
     _check_any_df_lib,
     _check_invalid_fields,
@@ -14080,6 +14083,7 @@ def interrogate(
                         "col_vals_le",
                         "col_vals_null",
                         "col_vals_not_null",
+                        "col_missing_coded",
                         "col_vals_increasing",
                         "col_vals_decreasing",
                         "col_vals_between",
@@ -14122,6 +14126,8 @@ def interrogate(
                             results_tbl = interrogate_null(tbl=tbl, column=column)
                         elif assertion_method == "not_null":
                             results_tbl = interrogate_not_null(tbl=tbl, column=column)
+                        elif assertion_method == "missing_coded":
+                            results_tbl = interrogate_missing_coded(tbl=tbl, column=column)
 
                         elif assertion_type == "col_vals_increasing":
                             from pointblank._interrogation import interrogate_increasing
@@ -14208,6 +14214,22 @@ def interrogate(
 
                         results_tbl = None
 
+                    elif assertion_type == "col_pct_missing":
+                        result_bool = col_pct_missing(
+                            data_tbl=data_tbl_step,
+                            column=column,
+                            sentinels=value["sentinels"],
+                            count_null=value["count_null"],
+                            max_pct=value["max_pct"],
+                        )
+
+                        validation.all_passed = result_bool
+                        validation.n = 1
+                        validation.n_passed = int(result_bool)
+                        validation.n_failed = 1 - int(result_bool)
+
+                        results_tbl = None
+
                     elif assertion_type == "col_vals_expr":
                         results_tbl = col_vals_expr(
                             data_tbl=data_tbl_step, expr=value, tbl_type=tbl_type
@@ -17267,6 +17289,7 @@ def get_tabular_report(
             elif assertion_type[i] in [
                 "col_vals_null",
                 "col_vals_not_null",
+                "col_missing_coded",
                 "col_exists",
                 "rows_distinct",
                 "rows_complete",
@@ -17282,6 +17305,16 @@ def get_tabular_report(
                 tol_value = bound_finder.keywords.get("tol", 0) if bound_finder else 0
                 values_upd.append(f"p = {p_value}<br/>tol = {tol_value}")
 
+            elif assertion_type[i] in ["col_pct_missing"]:
+                # Format the max_pct and any reason/category filter for display
+                max_pct_value = value["max_pct"]
+                filter_line = ""
+                if value.get("reason") is not None:
+                    filter_line = f"<br/>reason = {value['reason']}"
+                elif value.get("category") is not None:
+                    filter_line = f"<br/>category = {value['category']}"
+                values_upd.append(f"max_pct = {max_pct_value}{filter_line}")
+
             elif assertion_type[i] in ["data_freshness"]:
                 # Format max_age nicely for display
                 max_age = value.get("max_age")
@@ -19595,6 +19628,22 @@ def _create_autobrief_or_failure_text(
             n_rows=n_rows,
         )
 
+    if assertion_type == "col_pct_missing":
+        return _create_text_col_pct_missing(
+            lang=lang,
+            column=column,
+            value=values,
+            for_failure=for_failure,
+            locale=locale if locale else lang,
+        )
+
+    if assertion_type == "col_missing_coded":
+        return _create_text_col_missing_coded(
+            lang=lang,
+            column=column,
+            for_failure=for_failure,
+        )
+
     if assertion_type == "conjointly":
         return _create_text_conjointly(lang=lang, for_failure=for_failure)
 

From 81ef682e9a3949a8aafcdc857a569c1441f129ab Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:00:03 -0400
Subject: [PATCH 05/55] Add the MissingSpec class

---
 pointblank/missing.py | 242 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 242 insertions(+)
 create mode 100644 pointblank/missing.py

diff --git a/pointblank/missing.py b/pointblank/missing.py
new file mode 100644
index 000000000..04f3150a6
--- /dev/null
+++ b/pointblank/missing.py
@@ -0,0 +1,242 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+__all__ = [
+    "MissingSpec",
+]
+
+
+@dataclass
+class MissingSpec:
+    """
+    Specification for structured missing values in a column.
+
+    Real-world data rarely encodes missingness as a single `null` value. Survey data distinguishes
+    *refused* from *don't know* from *not applicable*; clinical data uses codes like `"NOT DONE"`;
+    statistical packages use sentinel values such as `-99`, `".A"`, or `""`. A `MissingSpec`
+    captures these sentinel values, the *reason* each one represents, and how they should be
+    handled during validation and analysis.
+
+    This brings the idea of *structured missingness* (a missing value carries a reason for its
+    absence) into Pointblank's runtime validation layer. Once defined, a `MissingSpec` can be
+    passed to validation methods (via `missing=`) to automatically exclude sentinel values from
+    constraint checks, or used with dedicated methods like
+    [`Validate.col_missing_coded()`](`pointblank.Validate.col_missing_coded`) and
+    [`Validate.col_pct_missing()`](`pointblank.Validate.col_pct_missing`).
+
+    Parameters
+    ----------
+    reasons
+        A dictionary mapping sentinel values to reason labels. Keys are the actual values present
+        in the data (e.g., `-99`, `"NA"`, `".A"`). Values are human-readable reason identifiers
+        (e.g., `"refused"`, `"not_asked"`).
+    categories
+        Optional grouping of reasons into categories (e.g., an `"item_nonresponse"` category that
+        groups `"refused"` and `"dont_know"`). Useful for aggregate reporting and for checking
+        missingness rates by category. Each value is a list of reason labels that appear in
+        `reasons`. Default is `None`.
+    null_is_missing
+        Whether actual null/`None`/`NaN` values should also be treated as missing (with reason
+        given by `null_reason`). Default is `True`.
+    null_reason
+        The reason label assigned to actual null values when `null_is_missing=True`. Default is
+        `"unknown"`.
+    description
+        Optional human-readable description of the overall missingness pattern. Default is `None`.
+
+    Returns
+    -------
+    MissingSpec
+        A missing-value specification that can be attached to a `Field` (via `missing=`) or passed
+        to validation methods.
+
+    Examples
+    --------
+    Define the missing-value codes for a survey `age` variable:
+
+    ```python
+    import pointblank as pb
+
+    age_missing = pb.MissingSpec(
+        reasons={
+            -99: "not_asked",       # Question wasn't asked to this participant
+            -98: "refused",         # Participant declined to answer
+            -97: "dont_know",       # Participant didn't know
+            -96: "not_applicable",  # Question doesn't apply
+        },
+        categories={
+            "item_nonresponse": ["refused", "dont_know"],
+            "design": ["not_asked", "not_applicable"],
+        },
+    )
+    ```
+
+    The spec can then answer questions about its own structure:
+
+    ```python
+    age_missing.sentinel_values()              # [-99, -98, -97, -96]
+    age_missing.reason_for(-98)                # "refused"
+    age_missing.values_for_reason("refused")   # [-98]
+    age_missing.values_for_category("item_nonresponse")  # [-98, -97]
+    ```
+    """
+
+    reasons: dict[Any, str]
+    categories: dict[str, list[str]] | None = None
+    null_is_missing: bool = True
+    null_reason: str = "unknown"
+    description: str | None = field(default=None)
+
+    def __post_init__(self) -> None:
+        self._validate()
+
+    def _validate(self) -> None:
+        """Validate that the missing specification is internally consistent."""
+        if not isinstance(self.reasons, dict):
+            raise TypeError(
+                f"reasons must be a dict mapping sentinel values to reason labels, "
+                f"got {type(self.reasons).__name__}"
+            )
+
+        if len(self.reasons) == 0 and not self.null_is_missing:
+            raise ValueError(
+                "A MissingSpec must define at least one sentinel value in `reasons`, "
+                "or set `null_is_missing=True`."
+            )
+
+        for value, reason in self.reasons.items():
+            if not isinstance(reason, str):
+                raise TypeError(
+                    f"Reason labels must be strings, got {type(reason).__name__} "
+                    f"for sentinel value {value!r}."
+                )
+
+        if not isinstance(self.null_reason, str):
+            raise TypeError(
+                f"null_reason must be a string, got {type(self.null_reason).__name__}."
+            )
+
+        if self.categories is not None:
+            if not isinstance(self.categories, dict):
+                raise TypeError(
+                    f"categories must be a dict mapping category names to lists of reason "
+                    f"labels, got {type(self.categories).__name__}."
+                )
+
+            known_reasons = set(self.reasons.values())
+            if self.null_is_missing:
+                known_reasons.add(self.null_reason)
+
+            for category, reason_list in self.categories.items():
+                if not isinstance(reason_list, (list, tuple)):
+                    raise TypeError(
+                        f"Category '{category}' must map to a list of reason labels, "
+                        f"got {type(reason_list).__name__}."
+                    )
+                unknown = [r for r in reason_list if r not in known_reasons]
+                if unknown:
+                    raise ValueError(
+                        f"Category '{category}' references unknown reason label(s) {unknown}. "
+                        f"Known reasons are {sorted(known_reasons)}."
+                    )
+
+    def sentinel_values(self) -> list:
+        """Get all sentinel values that encode missingness.
+
+        Returns
+        -------
+        list
+            The keys of `reasons` (the actual values in the data that represent missingness).
+            Note that this does *not* include `None` even when `null_is_missing=True`; use
+            [`is_missing()`](`pointblank.MissingSpec.is_missing`) to test individual values.
+        """
+        return list(self.reasons.keys())
+
+    def reason_for(self, value: Any) -> str | None:
+        """Get the reason label for a specific value.
+
+        Parameters
+        ----------
+        value
+            A value from the data.
+
+        Returns
+        -------
+        str | None
+            The reason label if `value` is a declared sentinel value, `null_reason` if `value`
+            is `None` and `null_is_missing=True`, or `None` if the value is not considered
+            missing.
+        """
+        if value is None:
+            return self.null_reason if self.null_is_missing else None
+        return self.reasons.get(value)
+
+    def is_missing(self, value: Any) -> bool:
+        """Check whether a value should be considered missing under this spec.
+
+        Parameters
+        ----------
+        value
+            A value from the data.
+
+        Returns
+        -------
+        bool
+            `True` if `value` is a declared sentinel value, or if `value` is `None` and
+            `null_is_missing=True`.
+        """
+        if value is None:
+            return self.null_is_missing
+        return value in self.reasons
+
+    def values_for_reason(self, reason: str) -> list:
+        """Get all sentinel values that correspond to a given reason.
+
+        Parameters
+        ----------
+        reason
+            A reason label.
+
+        Returns
+        -------
+        list
+            All sentinel values mapped to `reason`.
+        """
+        return [v for v, r in self.reasons.items() if r == reason]
+
+    def values_for_category(self, category: str) -> list:
+        """Get all sentinel values whose reason falls in a given category.
+
+        Parameters
+        ----------
+        category
+            A category name defined in `categories`.
+
+        Returns
+        -------
+        list
+            All sentinel values whose reason label is in the given category. Returns an empty
+            list if `categories` is `None` or the category is undefined.
+        """
+        if self.categories is None:
+            return []
+        reasons_in_cat = self.categories.get(category, [])
+        return [v for v, r in self.reasons.items() if r in reasons_in_cat]
+
+    def reasons_list(self) -> list[str]:
+        """Get the distinct reason labels defined by this spec.
+
+        Returns
+        -------
+        list[str]
+            The distinct reason labels (in first-seen order), including `null_reason` when
+            `null_is_missing=True`.
+        """
+        seen: dict[str, None] = {}
+        for r in self.reasons.values():
+            seen.setdefault(r, None)
+        if self.null_is_missing:
+            seen.setdefault(self.null_reason, None)
+        return list(seen.keys())

From 6a8439aac27678d088b70348eca961976da4f8a4 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:00:07 -0400
Subject: [PATCH 06/55] Update __init__.py

---
 pointblank/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pointblank/__init__.py b/pointblank/__init__.py
index e8d37b872..231e8233f 100644
--- a/pointblank/__init__.py
+++ b/pointblank/__init__.py
@@ -57,6 +57,7 @@
 from pointblank.generate.base import GeneratorConfig
 from pointblank.inspect import has_columns, has_rows
 from pointblank.integrations.otel import emit_otel
+from pointblank.missing import MissingSpec
 from pointblank.metadata import (
     ADaMDatasetTemplate,
     ADaMVariableSpec,
@@ -120,6 +121,7 @@
     "PipelineResult",
     "DataScan",
     "DraftValidation",
+    "MissingSpec",
     "col",
     "ref",
     "expr_col",

From 97c39b9dc16e5baec6e4e66c57fba87ae71820a2 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:00:45 -0400
Subject: [PATCH 07/55] Add compatible dtypes for missing_coded

---
 pointblank/_constants.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pointblank/_constants.py b/pointblank/_constants.py
index 204ef412e..287e97e8d 100644
--- a/pointblank/_constants.py
+++ b/pointblank/_constants.py
@@ -21,6 +21,7 @@
     "within_spec": ["str"],
     "null": ["str", "numeric", "bool", "datetime", "duration"],
     "not_null": ["str", "numeric", "bool", "datetime", "duration"],
+    "missing_coded": ["str", "numeric", "bool", "datetime", "duration"],
 }
 
 ASSERTION_TYPE_METHOD_MAP: dict[str, str] = {

From 4a9a8d65565b6fccf0c2fdfa77d775122ba96afe Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:01:13 -0400
Subject: [PATCH 08/55] Add to assertion-type/method map

---
 pointblank/_constants.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pointblank/_constants.py b/pointblank/_constants.py
index 287e97e8d..36b477237 100644
--- a/pointblank/_constants.py
+++ b/pointblank/_constants.py
@@ -26,6 +26,8 @@
 
 ASSERTION_TYPE_METHOD_MAP: dict[str, str] = {
     "col_pct_null": "pct_null",
+    "col_pct_missing": "pct_missing",
+    "col_missing_coded": "missing_coded",
     "col_vals_gt": "gt",
     "col_vals_lt": "lt",
     "col_vals_eq": "eq",

From 5c8f6a728689c8f6785a162dc3100c656edef8c5 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:01:32 -0400
Subject: [PATCH 09/55] Declare col_missing_coded() as row-based

---
 pointblank/_constants.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pointblank/_constants.py b/pointblank/_constants.py
index 36b477237..78b6b6163 100644
--- a/pointblank/_constants.py
+++ b/pointblank/_constants.py
@@ -94,6 +94,7 @@
     "col_vals_decreasing",
     "col_vals_null",
     "col_vals_not_null",
+    "col_missing_coded",
     "col_vals_expr",
     "conjointly",
     "prompt",

From 0cb11a4a6d04f3a8f206b09aef85a1d5defb8070 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:01:46 -0400
Subject: [PATCH 10/55] Add icons for reporting outputs

---
 pointblank/_constants.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/pointblank/_constants.py b/pointblank/_constants.py
index 78b6b6163..8e644d7f3 100644
--- a/pointblank/_constants.py
+++ b/pointblank/_constants.py
@@ -644,6 +644,18 @@
             </g>
         </g>
     </g>
+</svg>""",
+    "col_pct_missing": """<?xml version="1.0" encoding="UTF-8"?>
+<svg width="67px" height="67px" viewBox="0 0 67 67" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" style="background: #FFFFFF;">
+    <title>pct_missing</title>
+    <g id="Icons" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
+        <g id="col_pct_missing" transform="translate(1.000000, 1.581717)" fill-rule="nonzero">
+            <path d="M55,0 C57.4852813,0 59.7352813,1.00735931 61.363961,2.63603897 C62.9926407,4.26471863 64,6.51471863 64,9 L64,9 L64,64 L9,64 C6.51471862,64 4.26471862,62.9926407 2.63603897,61.363961 C1.00735931,59.7352814 0,57.4852814 0,55 L0,55 L0,9 C0,6.51471863 1.00735931,4.26471863 2.63603897,2.63603897 C4.26471862,1.00735931 6.51471862,0 9,0 L9,0 L55,0 Z" id="rectangle" stroke="#000000" stroke-width="2" fill="#FFFFFF"></path>
+            <g id="percent" transform="translate(11.268508, 23.854373)" fill="#000000">
+                <path d="M1.89920553,17.2037988 C1.78720553,17.1237988 1.664,17.0296274 1.584,16.8936274 C1.504,16.7576274 1.464,16.6096274 1.464,16.4496274 C1.464,16.2416274 1.536,16.0336274 1.68,15.8256274 L12.24,0.489627434 C12.432,0.185627434 12.5942662,0 12.9462662,0 C13.1542662,0 13.472,0.089627434 13.68,0.201627434 C14.048,0.425627434 14.232,0.681627434 14.232,0.969627434 C14.232,1.14562743 14.16,1.34562743 14.016,1.56962743 L3.432,16.9776274 C3.176,17.2816274 2.88,17.4336274 2.544,17.4336274 C2.336,17.4336274 2.13920553,17.3477988 1.89920553,17.2037988 Z M3.864,7.47362743 C3.176,7.47362743 2.536,7.30562743 1.944,6.96962743 C1.352,6.63362743 0.88,6.18162743 0.528,5.61362743 C0.176,5.04562743 0,4.42562743 0,3.75362743 C0,3.08162743 0.172,2.46162743 0.516,1.89362743 C0.86,1.32562743 1.328,0.877627434 1.92,0.549627434 C2.512,0.221627434 3.16,0.057627434 3.864,0.057627434 C4.568,0.057627434 5.216,0.221627434 5.808,0.549627434 C6.4,0.877627434 6.864,1.32562743 7.2,1.89362743 C7.536,2.46162743 7.704,3.08162743 7.704,3.75362743 C7.704,4.42562743 7.532,5.04562743 7.188,5.61362743 C6.844,6.18162743 6.38,6.63362743 5.796,6.96962743 C5.212,7.30562743 4.568,7.47362743 3.864,7.47362743 Z M3.864,5.69762743 C4.408,5.69762743 4.852,5.51362743 5.196,5.14562743 C5.54,4.77762743 5.712,4.31362743 5.712,3.75362743 C5.712,3.17762743 5.54,2.70562743 5.196,2.33762743 C4.852,1.96962743 4.408,1.78562743 3.864,1.78562743 C3.304,1.78562743 2.848,1.96962743 2.496,2.33762743 C2.144,2.70562743 1.968,3.17762743 1.968,3.75362743 C1.968,4.31362743 2.144,4.77762743 2.496,5.14562743 C2.848,5.51362743 3.304,5.69762743 3.864,5.69762743 Z M11.952,17.3856274 C11.248,17.3856274 10.6,17.2176274 10.008,16.8816274 C9.416,16.5456274 8.948,16.0936274 8.604,15.5256274 C8.26,14.9576274 8.088,14.3376274 8.088,13.6656274 C8.088,12.9936274 8.26,12.3736274 8.604,11.8056274 C8.948,11.2376274 9.416,10.7896274 10.008,10.4616274 C10.6,10.1336274 11.248,9.96962743 11.952,9.96962743 C12.656,9.96962743 13.3,10.1336274 13.884,10.4616274 C14.468,10.7896274 14.928,11.2376274 15.264,11.8056274 C15.6,12.3736274 15.768,12.9936274 15.768,13.6656274 C15.768,14.3376274 15.596,14.9576274 15.252,15.5256274 C14.908,16.0936274 14.444,16.5456274 13.86,16.8816274 C13.276,17.2176274 12.64,17.3856274 11.952,17.3856274 Z M11.952,15.6096274 C12.48,15.6096274 12.92,15.4296274 13.272,15.0696274 C13.624,14.7096274 13.8,14.2416274 13.8,13.6656274 C13.8,13.1056274 13.624,12.6416274 13.272,12.2736274 C12.92,11.9056274 12.48,11.7216274 11.952,11.7216274 C11.392,11.7216274 10.932,11.9056274 10.572,12.2736274 C10.212,12.6416274 10.032,13.1056274 10.032,13.6656274 C10.032,14.2416274 10.212,14.7096274 10.572,15.0696274 C10.932,15.4296274 11.392,15.6096274 11.952,15.6096274 Z"></path>
+            </g>
+        </g>
+    </g>
 </svg>""",
     "col_vals_not_null": """<?xml version="1.0" encoding="UTF-8"?>
 <svg width="67px" height="67px" viewBox="0 0 67 67" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">

From 30fb8853a1d12eb7654d24aa31d503d0c4afc5c7 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:01:54 -0400
Subject: [PATCH 11/55] Update _constants.py

---
 pointblank/_constants.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/pointblank/_constants.py b/pointblank/_constants.py
index 8e644d7f3..d898a7495 100644
--- a/pointblank/_constants.py
+++ b/pointblank/_constants.py
@@ -668,6 +668,16 @@
             <polygon id="line_white" fill="#FFFFFF" transform="translate(34.899496, 32.153303) rotate(-320.000000) translate(-34.899496, -32.153303) " points="34.3994962 8.54160469 35.3994962 8.54160469 35.3994962 55.7650019 34.3994962 55.7650019"></polygon>
         </g>
     </g>
+</svg>""",
+    "col_missing_coded": """<?xml version="1.0" encoding="UTF-8"?>
+<svg width="67px" height="67px" viewBox="0 0 67 67" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+    <title>col_missing_coded</title>
+    <g id="Icons" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
+        <g id="col_missing_coded" transform="translate(0.000000, 0.551724)">
+            <path d="M56.712234,1 C59.1975153,1 61.4475153,2.00735931 63.076195,3.63603897 C64.7048747,5.26471863 65.712234,7.51471863 65.712234,10 L65.712234,10 L65.712234,65 L10.712234,65 C8.22695259,65 5.97695259,63.9926407 4.34827294,62.363961 C2.71959328,60.7352814 1.71223397,58.4852814 1.71223397,56 L1.71223397,56 L1.71223397,10 C1.71223397,7.51471863 2.71959328,5.26471863 4.34827294,3.63603897 C5.97695259,2.00735931 8.22695259,1 10.712234,1 L10.712234,1 Z" id="rectangle" stroke="#000000" stroke-width="2" fill="#FFFFFF"></path>
+            <path d="M40.6120805,47.037834 C37.4692348,47.037834 35.0126139,45.9348613 33.712234,44.0140597 C32.4118541,45.9348613 29.9552331,47.037834 26.8123883,47.037834 C22.6574397,47.037834 16.0646712,43.4437723 16.0646712,33.8021619 C16.0646712,29.3401361 17.4715879,18.962166 30.5035862,18.962166 C30.9454018,18.962166 31.3057481,19.3225124 31.3057481,19.7643279 L31.3057481,21.3686518 C31.3057481,21.8104674 30.9454018,22.1708138 30.5035862,22.1708138 C26.6400486,22.1708138 22.4819668,25.8118774 22.4819668,33.8021619 C22.4819668,37.5090277 23.7635456,43.0270243 27.2949384,43.0270243 C29.795428,43.0270243 31.224279,40.4231312 32.0985095,38.2861221 C30.5067194,35.6101596 29.7014243,33.1034035 29.7014243,30.8347892 C29.7014243,25.6238707 31.8603677,23.7751377 33.712234,23.7751377 C35.5641002,23.7751377 37.7230437,25.6238707 37.7230437,30.8347892 C37.7230437,33.1347383 36.9396828,35.5788255 35.3290916,38.2861221 C36.6294715,41.4321009 38.243196,43.0270243 40.1295295,43.0270243 C43.6609223,43.0270243 44.9425012,37.5090277 44.9425012,33.8021619 C44.9425012,25.8118774 40.7844193,22.1708138 36.9208817,22.1708138 C36.4759329,22.1708138 36.1187198,21.8104674 36.1187198,21.3686518 L36.1187198,19.7643279 C36.1187198,19.3225124 36.4759329,18.962166 36.9208817,18.962166 C49.9528801,18.962166 51.3597967,29.3401361 51.3597967,33.8021619 C51.3597967,43.4437723 44.7670282,47.037834 40.6120805,47.037834 Z" id="omega" fill="#000000" fill-rule="nonzero"></path>
+        </g>
+    </g>
 </svg>""",
     "col_vals_regex": """<?xml version="1.0" encoding="UTF-8"?>
 <svg width="67px" height="67px" viewBox="0 0 67 67" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">

From cdfdc457d2ec638f2ef8ecb1ab4151c57fd9eb61 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:02:14 -0400
Subject: [PATCH 12/55] Add translations for missing validations

---
 pointblank/_constants_translations.py | 168 ++++++++++++++++++++++++++
 1 file changed, 168 insertions(+)

diff --git a/pointblank/_constants_translations.py b/pointblank/_constants_translations.py
index cb968d0ff..920d494e8 100644
--- a/pointblank/_constants_translations.py
+++ b/pointblank/_constants_translations.py
@@ -1049,6 +1049,174 @@
         "th": "เปอร์เซ็นต์ของค่า null ใน {column_text} ไม่อยู่ภายใน [{lower}%, {upper}%]",
         "fa": "درصد مقادیر null در {column_text} در محدوده [{lower}%, {upper}%] نبود.",
     },
+    "col_pct_missing_expectation_text": {
+        "en": "Expect that the percentage of missing values in {column_text} is at most {max_pct}%.",
+        "fr": "On s'attend à ce que le pourcentage de valeurs manquantes dans {column_text} soit d'au plus {max_pct}%.",
+        "de": "Erwarten Sie, dass der Prozentsatz der fehlenden Werte in {column_text} höchstens {max_pct}% beträgt.",
+        "it": "Aspettatevi che la percentuale di valori mancanti in {column_text} sia al massimo {max_pct}%.",
+        "es": "Se espera que el porcentaje de valores faltantes en {column_text} sea como máximo {max_pct}%.",
+        "pt": "Espera-se que a porcentagem de valores ausentes em {column_text} seja no máximo {max_pct}%.",
+        "ro": "Se așteaptă ca procentul valorilor lipsă în {column_text} să fie cel mult {max_pct}%.",
+        "tr": "{column_text} içindeki eksik değerlerin yüzdesinin en fazla {max_pct}% olmasını bekleyin.",
+        "zh-Hans": "预期{column_text}中缺失值的百分比最多为{max_pct}%。",
+        "zh-Hant": "{column_text}中缺失值的百分比應最多為{max_pct}%。",
+        "ja": "{column_text}の欠損値の割合が最大{max_pct}%であることを期待します。",
+        "ko": "{column_text}의 결측값 비율이 최대 {max_pct}%이어야 합니다.",
+        "vi": "Kỳ vọng tỷ lệ phần trăm giá trị thiếu trong {column_text} tối đa là {max_pct}%.",
+        "ru": "Ожидается, что процент отсутствующих значений в {column_text} составит не более {max_pct}%.",
+        "cs": "Očekává se, že procento chybějících hodnot ve sloupci {column_text} bude nejvýše {max_pct}%.",
+        "pl": "Oczekuje się, że procent brakujących wartości w {column_text} wyniesie co najwyżej {max_pct}%.",
+        "da": "Forvent, at procentdelen af manglende værdier i {column_text} højst er {max_pct}%.",
+        "sv": "Förvänta dig att andelen saknade värden i {column_text} är högst {max_pct}%.",
+        "nb": "Forvent at prosentandelen av manglende verdier i {column_text} er høyst {max_pct}%.",
+        "nl": "Verwacht dat het percentage ontbrekende waarden in {column_text} hoogstens {max_pct}% is.",
+        "fi": "Odota, että puuttuvien arvojen prosenttiosuus sarakkeessa {column_text} on enintään {max_pct}%.",
+        "is": "Væntir þess að hlutfall vantandi gilda í {column_text} sé í mesta lagi {max_pct}%.",
+        "ar": "توقع أن تكون نسبة القيم المفقودة في {column_text} {max_pct}% على الأكثر.",
+        "hi": "अपेक्षा है कि {column_text} में अनुपस्थित मानों का प्रतिशत अधिकतम {max_pct}% होना चाहिए।",
+        "el": "Αναμένεται το ποσοστό των ελλιπών τιμών στη στήλη {column_text} να είναι το πολύ {max_pct}%.",
+        "id": "Mengharapkan bahwa persentase nilai yang hilang dalam {column_text} paling banyak {max_pct}%.",
+        "uk": "Очікується, що відсоток відсутніх значень в {column_text} становитиме не більше {max_pct}%.",
+        "bg": "Очаква се процентът на липсващите стойности в {column_text} да бъде най-много {max_pct}%.",
+        "hr": "Očekuje se da postotak nedostajućih vrijednosti u {column_text} bude najviše {max_pct}%.",
+        "et": "Eeldatakse, et puuduvate väärtuste protsent veerus {column_text} on kõige rohkem {max_pct}%.",
+        "hu": "Elvárás, hogy a hiányzó értékek aránya a {column_text} oszlopban legfeljebb {max_pct}% legyen.",
+        "ga": "Táthar ag súil go mbeadh céatadán na luachanna ar iarraidh i {column_text} ar a mhéad {max_pct}%.",
+        "lv": "Tiek sagaidīts, ka trūkstošo vērtību procents {column_text} būs ne vairāk kā {max_pct}%.",
+        "lt": "Tikimasi, kad trūkstamų reikšmių procentas stulpelyje {column_text} bus ne daugiau kaip {max_pct}%.",
+        "mt": "Mistenni li l-perċentwal ta' valuri nieqsa f'{column_text} huwa l-aktar {max_pct}%.",
+        "sk": "Očakáva sa, že percento chýbajúcich hodnôt v {column_text} bude najviac {max_pct}%.",
+        "sl": "Pričakuje se, da bo odstotek manjkajočih vrednosti v {column_text} največ {max_pct}%.",
+        "he": "צפוי שאחוז הערכים החסרים ב{column_text} יהיה לכל היותר {max_pct}%.",
+        "th": "คาดหวังว่าเปอร์เซ็นต์ของค่าที่หายไปใน {column_text} จะไม่เกิน {max_pct}%",
+        "fa": "انتظار می‌رود که درصد مقادیر مفقود در {column_text} حداکثر {max_pct}% باشد.",
+    },
+    "col_pct_missing_failure_text": {
+        "en": "The percentage of missing values in {column_text} exceeded {max_pct}%.",
+        "fr": "Le pourcentage de valeurs manquantes dans {column_text} a dépassé {max_pct}%.",
+        "de": "Der Prozentsatz der fehlenden Werte in {column_text} überschritt {max_pct}%.",
+        "it": "La percentuale di valori mancanti in {column_text} ha superato {max_pct}%.",
+        "es": "El porcentaje de valores faltantes en {column_text} superó {max_pct}%.",
+        "pt": "A porcentagem de valores ausentes em {column_text} excedeu {max_pct}%.",
+        "ro": "Procentul valorilor lipsă în {column_text} a depășit {max_pct}%.",
+        "tr": "{column_text} içindeki eksik değerlerin yüzdesi {max_pct}% değerini aştı.",
+        "zh-Hans": "{column_text}中缺失值的百分比超过了{max_pct}%。",
+        "zh-Hant": "{column_text}中缺失值的百分比超過了{max_pct}%。",
+        "ja": "{column_text}の欠損値の割合が{max_pct}%を超えました。",
+        "ko": "{column_text}의 결측값 비율이 {max_pct}%를 초과했습니다.",
+        "vi": "Tỷ lệ phần trăm giá trị thiếu trong {column_text} đã vượt quá {max_pct}%.",
+        "ru": "Процент отсутствующих значений в {column_text} превысил {max_pct}%.",
+        "cs": "Procento chybějících hodnot ve sloupci {column_text} překročilo {max_pct}%.",
+        "pl": "Procent brakujących wartości w {column_text} przekroczył {max_pct}%.",
+        "da": "Procentdelen af manglende værdier i {column_text} oversteg {max_pct}%.",
+        "sv": "Andelen saknade värden i {column_text} översteg {max_pct}%.",
+        "nb": "Prosentandelen av manglende verdier i {column_text} oversteg {max_pct}%.",
+        "nl": "Het percentage ontbrekende waarden in {column_text} overschreed {max_pct}%.",
+        "fi": "Puuttuvien arvojen prosenttiosuus sarakkeessa {column_text} ylitti {max_pct}%.",
+        "is": "Hlutfall vantandi gilda í {column_text} fór yfir {max_pct}%.",
+        "ar": "تجاوزت نسبة القيم المفقودة في {column_text} {max_pct}%.",
+        "hi": "{column_text} में अनुपस्थित मानों का प्रतिशत {max_pct}% से अधिक था।",
+        "el": "Το ποσοστό των ελλιπών τιμών στη στήλη {column_text} ξεπέρασε {max_pct}%.",
+        "id": "Persentase nilai yang hilang dalam {column_text} melebihi {max_pct}%.",
+        "uk": "Відсоток відсутніх значень в {column_text} перевищив {max_pct}%.",
+        "bg": "Процентът на липсващите стойности в {column_text} надхвърли {max_pct}%.",
+        "hr": "Postotak nedostajućih vrijednosti u {column_text} premašio je {max_pct}%.",
+        "et": "Puuduvate väärtuste protsent veerus {column_text} ületas {max_pct}%.",
+        "hu": "A hiányzó értékek aránya a {column_text} oszlopban meghaladta a {max_pct}%-ot.",
+        "ga": "Sháraigh céatadán na luachanna ar iarraidh i {column_text} {max_pct}%.",
+        "lv": "Trūkstošo vērtību procents {column_text} pārsniedza {max_pct}%.",
+        "lt": "Trūkstamų reikšmių procentas stulpelyje {column_text} viršijo {max_pct}%.",
+        "mt": "Il-perċentwal ta' valuri nieqsa f'{column_text} qabeż {max_pct}%.",
+        "sk": "Percento chýbajúcich hodnôt v {column_text} prekročilo {max_pct}%.",
+        "sl": "Odstotek manjkajočih vrednosti v {column_text} je presegel {max_pct}%.",
+        "he": "אחוז הערכים החסרים ב{column_text} חרג מ-{max_pct}%.",
+        "th": "เปอร์เซ็นต์ของค่าที่หายไปใน {column_text} เกิน {max_pct}%",
+        "fa": "درصد مقادیر مفقود در {column_text} از {max_pct}% فراتر رفت.",
+    },
+    "col_missing_coded_expectation_text": {
+        "en": "Expect that all missing values in {column_text} are coded (no uncoded Null values).",
+        "fr": "On s'attend à ce que toutes les valeurs manquantes dans {column_text} soient codées (aucune valeur nulle non codée).",
+        "de": "Erwarten Sie, dass alle fehlenden Werte in {column_text} kodiert sind (keine unkodierten Nullwerte).",
+        "it": "Aspettatevi che tutti i valori mancanti in {column_text} siano codificati (nessun valore nullo non codificato).",
+        "es": "Se espera que todos los valores faltantes en {column_text} estén codificados (sin valores nulos no codificados).",
+        "pt": "Espera-se que todos os valores ausentes em {column_text} estejam codificados (sem valores nulos não codificados).",
+        "ro": "Se așteaptă ca toate valorile lipsă în {column_text} să fie codificate (fără valori nule necodificate).",
+        "tr": "{column_text} içindeki tüm eksik değerlerin kodlanmış olmasını bekleyin (kodlanmamış boş değer yok).",
+        "zh-Hans": "预期{column_text}中所有缺失值都已编码（没有未编码的空值）。",
+        "zh-Hant": "{column_text}中所有缺失值都應已編碼（沒有未編碼的空值）。",
+        "ja": "{column_text}のすべての欠損値がコード化されていることを期待します（コード化されていないnull値がない）。",
+        "ko": "{column_text}의 모든 결측값이 코드화되어 있어야 합니다(코드화되지 않은 null 값 없음).",
+        "vi": "Kỳ vọng tất cả giá trị thiếu trong {column_text} đều được mã hóa (không có giá trị null chưa mã hóa).",
+        "ru": "Ожидается, что все отсутствующие значения в {column_text} закодированы (нет незакодированных нулевых значений).",
+        "cs": "Očekává se, že všechny chybějící hodnoty ve sloupci {column_text} jsou zakódované (žádné nezakódované null hodnoty).",
+        "pl": "Oczekuje się, że wszystkie brakujące wartości w {column_text} są zakodowane (brak niezakodowanych wartości null).",
+        "da": "Forvent, at alle manglende værdier i {column_text} er kodede (ingen ukodede null-værdier).",
+        "sv": "Förvänta dig att alla saknade värden i {column_text} är kodade (inga okodade null-värden).",
+        "nb": "Forvent at alle manglende verdier i {column_text} er kodet (ingen ukodede null-verdier).",
+        "nl": "Verwacht dat alle ontbrekende waarden in {column_text} gecodeerd zijn (geen ongecodeerde null-waarden).",
+        "fi": "Odota, että kaikki puuttuvat arvot sarakkeessa {column_text} on koodattu (ei koodaamattomia null-arvoja).",
+        "is": "Væntir þess að öll vantandi gildi í {column_text} séu kóðuð (engin ókóðuð null-gildi).",
+        "ar": "توقع أن تكون جميع القيم المفقودة في {column_text} مرمّزة (لا توجد قيم فارغة غير مرمّزة).",
+        "hi": "अपेक्षा है कि {column_text} में सभी अनुपस्थित मान कोडित हों (कोई बिना कोडित null मान नहीं)।",
+        "el": "Αναμένεται όλες οι ελλιπείς τιμές στη στήλη {column_text} να είναι κωδικοποιημένες (καμία μη κωδικοποιημένη null τιμή).",
+        "id": "Mengharapkan bahwa semua nilai yang hilang dalam {column_text} dikodekan (tidak ada nilai null yang tidak dikodekan).",
+        "uk": "Очікується, що всі відсутні значення в {column_text} закодовані (немає незакодованих нульових значень).",
+        "bg": "Очаква се всички липсващи стойности в {column_text} да са кодирани (без некодирани null стойности).",
+        "hr": "Očekuje se da su sve nedostajuće vrijednosti u {column_text} kodirane (bez nekodiranih null vrijednosti).",
+        "et": "Eeldatakse, et kõik puuduvad väärtused veerus {column_text} on kodeeritud (kodeerimata null-väärtusi pole).",
+        "hu": "Elvárás, hogy a {column_text} oszlopban minden hiányzó érték kódolt legyen (nincs kódolatlan null érték).",
+        "ga": "Táthar ag súil go mbeadh gach luach ar iarraidh i {column_text} códaithe (gan aon luachanna null gan chódú).",
+        "lv": "Tiek sagaidīts, ka visas trūkstošās vērtības {column_text} ir kodētas (nav nekodētu null vērtību).",
+        "lt": "Tikimasi, kad visos trūkstamos reikšmės stulpelyje {column_text} yra užkoduotos (nėra neužkoduotų null reikšmių).",
+        "mt": "Mistenni li l-valuri nieqsa kollha f'{column_text} huma kodifikati (l-ebda valuri null mhux kodifikati).",
+        "sk": "Očakáva sa, že všetky chýbajúce hodnoty v {column_text} sú zakódované (žiadne nezakódované null hodnoty).",
+        "sl": "Pričakuje se, da so vse manjkajoče vrednosti v {column_text} kodirane (brez nekodiranih null vrednosti).",
+        "he": "צפוי שכל הערכים החסרים ב{column_text} יהיו מקודדים (אין ערכי null לא מקודדים).",
+        "th": "คาดหวังว่าค่าที่หายไปทั้งหมดใน {column_text} จะถูกเข้ารหัส (ไม่มีค่า null ที่ไม่ได้เข้ารหัส)",
+        "fa": "انتظار می‌رود که همه مقادیر مفقود در {column_text} کدگذاری شده باشند (هیچ مقدار null کدگذاری‌نشده‌ای وجود نداشته باشد).",
+    },
+    "col_missing_coded_failure_text": {
+        "en": "Uncoded missing values (raw Null values) were present in {column_text}.",
+        "fr": "Des valeurs manquantes non codées (valeurs nulles brutes) étaient présentes dans {column_text}.",
+        "de": "Unkodierte fehlende Werte (rohe Nullwerte) waren in {column_text} vorhanden.",
+        "it": "Erano presenti valori mancanti non codificati (valori nulli grezzi) in {column_text}.",
+        "es": "Había valores faltantes no codificados (valores nulos sin procesar) en {column_text}.",
+        "pt": "Havia valores ausentes não codificados (valores nulos brutos) em {column_text}.",
+        "ro": "Valori lipsă necodificate (valori nule brute) au fost prezente în {column_text}.",
+        "tr": "{column_text} içinde kodlanmamış eksik değerler (ham boş değerler) mevcuttu.",
+        "zh-Hans": "{column_text}中存在未编码的缺失值（原始空值）。",
+        "zh-Hant": "{column_text}中存在未編碼的缺失值（原始空值）。",
+        "ja": "{column_text}にコード化されていない欠損値（生のnull値）が存在しました。",
+        "ko": "{column_text}에 코드화되지 않은 결측값(원시 null 값)이 있었습니다.",
+        "vi": "Có giá trị thiếu chưa mã hóa (giá trị null thô) trong {column_text}.",
+        "ru": "В {column_text} присутствовали незакодированные отсутствующие значения (необработанные нулевые значения).",
+        "cs": "Ve sloupci {column_text} byly přítomny nezakódované chybějící hodnoty (surové null hodnoty).",
+        "pl": "W {column_text} obecne były niezakodowane brakujące wartości (surowe wartości null).",
+        "da": "Ukodede manglende værdier (rå null-værdier) var til stede i {column_text}.",
+        "sv": "Okodade saknade värden (råa null-värden) förekom i {column_text}.",
+        "nb": "Ukodede manglende verdier (rå null-verdier) var til stede i {column_text}.",
+        "nl": "Ongecodeerde ontbrekende waarden (ruwe null-waarden) waren aanwezig in {column_text}.",
+        "fi": "Sarakkeessa {column_text} oli koodaamattomia puuttuvia arvoja (raakoja null-arvoja).",
+        "is": "Ókóðuð vantandi gildi (hrá null-gildi) voru til staðar í {column_text}.",
+        "ar": "كانت هناك قيم مفقودة غير مرمّزة (قيم فارغة خام) في {column_text}.",
+        "hi": "{column_text} में बिना कोडित अनुपस्थित मान (कच्चे null मान) मौजूद थे।",
+        "el": "Μη κωδικοποιημένες ελλιπείς τιμές (ακατέργαστες null τιμές) υπήρχαν στη στήλη {column_text}.",
+        "id": "Nilai yang hilang tidak dikodekan (nilai null mentah) ada dalam {column_text}.",
+        "uk": "У {column_text} були присутні незакодовані відсутні значення (необроблені нульові значення).",
+        "bg": "В {column_text} присъстваха некодирани липсващи стойности (необработени null стойности).",
+        "hr": "U {column_text} bile su prisutne nekodirane nedostajuće vrijednosti (sirove null vrijednosti).",
+        "et": "Veerus {column_text} esinesid kodeerimata puuduvad väärtused (toored null-väärtused).",
+        "hu": "A {column_text} oszlopban kódolatlan hiányzó értékek (nyers null értékek) voltak jelen.",
+        "ga": "Bhí luachanna ar iarraidh gan chódú (luachanna null amha) i láthair i {column_text}.",
+        "lv": "{column_text} bija nekodētas trūkstošās vērtības (neapstrādātas null vērtības).",
+        "lt": "Stulpelyje {column_text} buvo neužkoduotų trūkstamų reikšmių (neapdorotų null reikšmių).",
+        "mt": "Valuri nieqsa mhux kodifikati (valuri null mhux ipproċessati) kienu preżenti f'{column_text}.",
+        "sk": "V {column_text} sa vyskytli nezakódované chýbajúce hodnoty (surové null hodnoty).",
+        "sl": "V {column_text} so bile prisotne nekodirane manjkajoče vrednosti (surove null vrednosti).",
+        "he": "ערכים חסרים לא מקודדים (ערכי null גולמיים) היו נוכחים ב{column_text}.",
+        "th": "มีค่าที่หายไปที่ไม่ได้เข้ารหัส (ค่า null ดิบ) อยู่ใน {column_text}",
+        "fa": "مقادیر مفقود کدگذاری‌نشده (مقادیر null خام) در {column_text} وجود داشت.",
+    },
     "regex_expectation_text": {
         "en": "Expect that values in {column_text} should match the regular expression: {values_text}.",
         "fr": "On s'attend à ce que les valeurs de {column_text} correspondent à l'expression régulière : {values_text}.",

From d855d60cf107ece96193d57bebb26d5aac3472ce Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:02:32 -0400
Subject: [PATCH 13/55] Add interrogation functions for missing valdns

---
 pointblank/_interrogation.py | 59 ++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/pointblank/_interrogation.py b/pointblank/_interrogation.py
index 28cd2bf34..8485ef96e 100644
--- a/pointblank/_interrogation.py
+++ b/pointblank/_interrogation.py
@@ -755,6 +755,52 @@ def col_pct_null(
     return n_null >= (abs_target - lower_bound) and n_null <= (abs_target + upper_bound)
 
 
+def col_pct_missing(
+    data_tbl: IntoFrame,
+    column: str,
+    sentinels: list,
+    count_null: bool,
+    max_pct: float,
+) -> bool:
+    """Check that the percentage of missing values in a column does not exceed `max_pct`.
+
+    Missing values are those equal to one of the `sentinels` and, when `count_null=True`, actual
+    null values. The percentage is computed over the total number of rows.
+    """
+    nw_frame = nw.from_native(data_tbl)
+
+    # Build a boolean expression that flags missing values
+    missing_expr = None
+    if sentinels:
+        missing_expr = nw.col(column).is_in(sentinels)
+    if count_null:
+        null_expr = nw.col(column).is_null()
+        missing_expr = null_expr if missing_expr is None else (missing_expr | null_expr)
+
+    if missing_expr is None:
+        # Nothing counts as missing under this spec/filter
+        return 0.0 <= max_pct
+
+    # Cast boolean to Int32 before sum to support PySpark which can't sum booleans
+    if is_narwhals_lazyframe(nw_frame):
+        stats = nw_frame.select(
+            total_rows=nw.len(),
+            n_missing=missing_expr.cast(nw.Int32).sum(),
+        ).collect()
+        total_rows: int = int(stats["total_rows"][0])
+        n_missing: int = int(stats["n_missing"][0])
+    else:
+        assert is_narwhals_dataframe(nw_frame)
+        total_rows = int(nw_frame.select(nw.len()).item())
+        n_missing = int(nw_frame.select(missing_expr.cast(nw.Int32).sum()).item())
+
+    if total_rows == 0:
+        return True
+
+    pct_missing = n_missing / total_rows
+    return pct_missing <= max_pct
+
+
 def col_count_match(data_tbl: IntoFrame, count: Any, inverse: bool) -> bool:
     """
     Check if DataFrame column count matches expected count.
@@ -2534,6 +2580,19 @@ def interrogate_not_null(tbl: IntoFrame, column: str) -> Any:
     return result_tbl.to_native()
 
 
+def interrogate_missing_coded(tbl: IntoFrame, column: str) -> Any:
+    """Missing-coded interrogation.
+
+    A row passes when its value is *not* a raw null. Under the structured-missingness model, every
+    absence should be expressed with an explicit sentinel code (which is non-null), so a raw null
+    represents *uncoded* missingness and fails the test unit.
+    """
+    nw_tbl = nw.from_native(tbl)
+    assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
+    result_tbl = nw_tbl.with_columns(pb_is_good_=~nw.col(column).is_null())
+    return result_tbl.to_native()
+
+
 def interrogate_increasing(
     tbl: IntoFrame, column: str, allow_stationary: bool, decreasing_tol: float, na_pass: bool
 ) -> Any:

From 47b23a0ab33728281f393e6b34a2b198bd61951e Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:02:35 -0400
Subject: [PATCH 14/55] Update validate.pyi

---
 pointblank/validate.pyi | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/pointblank/validate.pyi b/pointblank/validate.pyi
index 82a7664de..47124b150 100644
--- a/pointblank/validate.pyi
+++ b/pointblank/validate.pyi
@@ -7,6 +7,7 @@ from pathlib import Path
 from pointblank._typing import SegmentSpec, Tolerance
 from pointblank._utils import _PBUnresolvedColumn
 from pointblank.column import Column, ColumnSelector, ColumnSelectorNarwhals, ReferenceColumn
+from pointblank.missing import MissingSpec
 from pointblank.schema import Schema
 from pointblank.thresholds import Actions, FinalActions, Thresholds
 from typing import Any, Callable, Literal, ParamSpec, TypeVar
@@ -394,6 +395,29 @@ class Validate:
         brief: str | bool | None = None,
         active: bool | Callable = True,
     ) -> Validate: ...
+    def col_pct_missing(
+        self,
+        columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
+        missing: MissingSpec,
+        max_pct: float,
+        reason: str | None = None,
+        category: str | None = None,
+        thresholds: int | float | None | bool | tuple | dict | Thresholds = None,
+        actions: Actions | None = None,
+        brief: str | bool | None = None,
+        active: bool | Callable = True,
+    ) -> Validate: ...
+    def col_missing_coded(
+        self,
+        columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
+        missing: MissingSpec,
+        pre: Callable | None = None,
+        segments: SegmentSpec | None = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
+        actions: Actions | None = None,
+        brief: str | bool | None = None,
+        active: bool | Callable = True,
+    ) -> Validate: ...
     def rows_distinct(
         self,
         columns_subset: str | list[str] | None = None,

From c68bc5ecdcbd94f3ab4cc7718936d44d5fac8974 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:02:43 -0400
Subject: [PATCH 15/55] Create test_col_missing_coded.py

---
 tests/test_col_missing_coded.py | 96 +++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)
 create mode 100644 tests/test_col_missing_coded.py

diff --git a/tests/test_col_missing_coded.py b/tests/test_col_missing_coded.py
new file mode 100644
index 000000000..c4911eaae
--- /dev/null
+++ b/tests/test_col_missing_coded.py
@@ -0,0 +1,96 @@
+import polars as pl
+import pytest
+
+import pointblank as pb
+
+
+@pytest.fixture
+def age_missing():
+    return pb.MissingSpec(reasons={-99: "not_asked", -98: "refused", -97: "dont_know"})
+
+
+class TestColMissingCoded:
+    def test_passes_when_all_coded(self, age_missing):
+        # All absence expressed as sentinels; no raw nulls
+        tbl = pl.DataFrame({"age": [34, -98, 41, -99, 29, -97, 55, 38]})
+        validation = (
+            pb.Validate(data=tbl)
+            .col_missing_coded(columns="age", missing=age_missing)
+            .interrogate()
+        )
+        info = validation.validation_info[0]
+        assert info.n == 8
+        assert info.n_failed == 0
+        assert info.all_passed is True
+
+    def test_fails_on_raw_null(self, age_missing):
+        tbl = pl.DataFrame({"age": [34, -98, 41, None, 29, -99, 55, None]})
+        validation = (
+            pb.Validate(data=tbl)
+            .col_missing_coded(columns="age", missing=age_missing)
+            .interrogate()
+        )
+        info = validation.validation_info[0]
+        assert info.n == 8
+        assert info.n_failed == 2  # two raw nulls
+        assert info.all_passed is False
+
+    def test_sentinels_pass(self, age_missing):
+        # only sentinels and reals, no nulls -> all pass
+        tbl = pl.DataFrame({"age": [-99, -98, -97, -99]})
+        validation = (
+            pb.Validate(data=tbl)
+            .col_missing_coded(columns="age", missing=age_missing)
+            .interrogate()
+        )
+        assert validation.validation_info[0].all_passed is True
+
+    def test_missing_must_be_missingspec(self):
+        tbl = pl.DataFrame({"age": [1, 2, 3]})
+        with pytest.raises(TypeError):
+            pb.Validate(data=tbl).col_missing_coded(columns="age", missing={-99: "x"})
+
+    def test_multiple_columns(self, age_missing):
+        tbl = pl.DataFrame({"a": [1, None, 3], "b": [-99, 2, 3]})
+        validation = (
+            pb.Validate(data=tbl)
+            .col_missing_coded(columns=["a", "b"], missing=age_missing)
+            .interrogate()
+        )
+        assert len(validation.validation_info) == 2
+        assert validation.validation_info[0].n_failed == 1  # column a has a null
+        assert validation.validation_info[1].n_failed == 0  # column b has none
+
+    def test_report_renders_with_brief(self, age_missing):
+        tbl = pl.DataFrame({"age": [34, None, 41]})
+        validation = (
+            pb.Validate(data=tbl)
+            .col_missing_coded(columns="age", missing=age_missing, brief=True)
+            .interrogate()
+        )
+        gt = validation.get_tabular_report()
+        assert gt is not None
+
+
+class TestAutobriefTranslations:
+    """Exercise the autobrief text builders across languages (no KeyError)."""
+
+    @pytest.mark.parametrize("lang", ["en", "fr", "de", "ja", "ar", "zh-Hans", "fa", "he"])
+    def test_col_missing_coded_brief_langs(self, age_missing, lang):
+        tbl = pl.DataFrame({"age": [34, None, 41]})
+        validation = (
+            pb.Validate(data=tbl, lang=lang)
+            .col_missing_coded(columns="age", missing=age_missing, brief=True)
+            .interrogate()
+        )
+        assert validation.validation_info[0].autobrief
+
+    @pytest.mark.parametrize("lang", ["en", "fr", "de", "ja", "ar", "zh-Hans", "fa", "he"])
+    def test_col_pct_missing_brief_langs(self, age_missing, lang):
+        tbl = pl.DataFrame({"age": [34, -98, 41, -99]})
+        validation = (
+            pb.Validate(data=tbl, lang=lang)
+            .col_pct_missing(columns="age", missing=age_missing, max_pct=0.5, brief=True)
+            .interrogate()
+        )
+        assert validation.validation_info[0].autobrief

From e3351a5d13a7770ee0eee3b0d7a36105f44b5387 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:02:45 -0400
Subject: [PATCH 16/55] Create test_col_pct_missing.py

---
 tests/test_col_pct_missing.py | 144 ++++++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)
 create mode 100644 tests/test_col_pct_missing.py

diff --git a/tests/test_col_pct_missing.py b/tests/test_col_pct_missing.py
new file mode 100644
index 000000000..a1c7d4b78
--- /dev/null
+++ b/tests/test_col_pct_missing.py
@@ -0,0 +1,144 @@
+import polars as pl
+import pytest
+
+import pointblank as pb
+
+
+@pytest.fixture
+def survey_tbl():
+    # 8 rows: ages with sentinel codes
+    #  -99 = not_asked, -98 = refused, -97 = dont_know
+    # values: 34, -98, 41, -99, 29, -98, 55, 38
+    #  -> 2 refused, 1 not_asked, 0 dont_know, 5 real -> 3/8 = 0.375 missing
+    return pl.DataFrame({"age": [34, -98, 41, -99, 29, -98, 55, 38]})
+
+
+@pytest.fixture
+def age_missing():
+    return pb.MissingSpec(
+        reasons={-99: "not_asked", -98: "refused", -97: "dont_know"},
+        categories={"item_nonresponse": ["refused", "dont_know"], "design": ["not_asked"]},
+    )
+
+
+def _single_step_passed(validation):
+    info = validation.validation_info[0]
+    return info.all_passed
+
+
+class TestColPctMissing:
+    def test_overall_pass(self, survey_tbl, age_missing):
+        validation = (
+            pb.Validate(data=survey_tbl)
+            .col_pct_missing(columns="age", missing=age_missing, max_pct=0.5)
+            .interrogate()
+        )
+        assert _single_step_passed(validation) is True
+
+    def test_overall_fail(self, survey_tbl, age_missing):
+        validation = (
+            pb.Validate(data=survey_tbl)
+            .col_pct_missing(columns="age", missing=age_missing, max_pct=0.30)
+            .interrogate()
+        )
+        # 3/8 = 0.375 > 0.30 -> fail
+        assert _single_step_passed(validation) is False
+
+    def test_by_reason_refused(self, survey_tbl, age_missing):
+        # 2/8 = 0.25 refused
+        passing = (
+            pb.Validate(data=survey_tbl)
+            .col_pct_missing(columns="age", missing=age_missing, reason="refused", max_pct=0.25)
+            .interrogate()
+        )
+        failing = (
+            pb.Validate(data=survey_tbl)
+            .col_pct_missing(columns="age", missing=age_missing, reason="refused", max_pct=0.20)
+            .interrogate()
+        )
+        assert _single_step_passed(passing) is True
+        assert _single_step_passed(failing) is False
+
+    def test_by_reason_zero(self, survey_tbl, age_missing):
+        # no dont_know values -> 0% always passes
+        validation = (
+            pb.Validate(data=survey_tbl)
+            .col_pct_missing(columns="age", missing=age_missing, reason="dont_know", max_pct=0.0)
+            .interrogate()
+        )
+        assert _single_step_passed(validation) is True
+
+    def test_by_category(self, survey_tbl, age_missing):
+        # item_nonresponse = refused + dont_know = 2/8 = 0.25
+        passing = (
+            pb.Validate(data=survey_tbl)
+            .col_pct_missing(
+                columns="age", missing=age_missing, category="item_nonresponse", max_pct=0.25
+            )
+            .interrogate()
+        )
+        assert _single_step_passed(passing) is True
+
+    def test_nulls_counted(self, age_missing):
+        tbl = pl.DataFrame({"age": [34, None, 41, -98, 29, 38, 55, 38]})
+        # null_is_missing=True by default: 1 null + 1 refused = 2/8 = 0.25
+        validation = (
+            pb.Validate(data=tbl)
+            .col_pct_missing(columns="age", missing=age_missing, max_pct=0.25)
+            .interrogate()
+        )
+        assert _single_step_passed(validation) is True
+
+    def test_nulls_excluded_when_spec_says_so(self):
+        spec = pb.MissingSpec(reasons={-98: "refused"}, null_is_missing=False)
+        tbl = pl.DataFrame({"age": [34, None, None, -98, 29, 38, 55, 38]})
+        # only -98 counts: 1/8 = 0.125
+        validation = (
+            pb.Validate(data=tbl)
+            .col_pct_missing(columns="age", missing=spec, max_pct=0.125)
+            .interrogate()
+        )
+        assert _single_step_passed(validation) is True
+
+    def test_reason_and_category_mutually_exclusive(self, survey_tbl, age_missing):
+        with pytest.raises(ValueError, match="Only one of"):
+            pb.Validate(data=survey_tbl).col_pct_missing(
+                columns="age",
+                missing=age_missing,
+                reason="refused",
+                category="item_nonresponse",
+                max_pct=0.5,
+            )
+
+    def test_max_pct_bounds(self, survey_tbl, age_missing):
+        with pytest.raises(ValueError, match="max_pct"):
+            pb.Validate(data=survey_tbl).col_pct_missing(
+                columns="age", missing=age_missing, max_pct=1.5
+            )
+
+    def test_missing_must_be_missingspec(self, survey_tbl):
+        with pytest.raises(TypeError):
+            pb.Validate(data=survey_tbl).col_pct_missing(
+                columns="age", missing={-99: "not_asked"}, max_pct=0.5
+            )
+
+    def test_multiple_columns(self, age_missing):
+        tbl = pl.DataFrame(
+            {"a": [1, -98, 3, 4], "b": [-99, -99, 3, 4]}
+        )
+        validation = (
+            pb.Validate(data=tbl)
+            .col_pct_missing(columns=["a", "b"], missing=age_missing, max_pct=0.5)
+            .interrogate()
+        )
+        assert len(validation.validation_info) == 2
+
+    def test_report_renders(self, survey_tbl, age_missing):
+        # The validation report should build without error (exercises icon + value rendering)
+        validation = (
+            pb.Validate(data=survey_tbl)
+            .col_pct_missing(columns="age", missing=age_missing, max_pct=0.5, brief=True)
+            .interrogate()
+        )
+        gt = validation.get_tabular_report()
+        assert gt is not None

From 6e3a5727a8cee672c0511c21bf77ddf65f16cc6a Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:02:48 -0400
Subject: [PATCH 17/55] Create test_missing.py

---
 tests/test_missing.py | 124 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100644 tests/test_missing.py

diff --git a/tests/test_missing.py b/tests/test_missing.py
new file mode 100644
index 000000000..53b62099a
--- /dev/null
+++ b/tests/test_missing.py
@@ -0,0 +1,124 @@
+import pytest
+
+import pointblank as pb
+from pointblank.missing import MissingSpec
+
+
+class TestMissingSpecConstruction:
+    """Tests for MissingSpec construction and validation."""
+
+    def test_minimal_spec(self):
+        spec = MissingSpec(reasons={-99: "not_asked"})
+        assert spec.reasons == {-99: "not_asked"}
+        assert spec.categories is None
+        assert spec.null_is_missing is True
+        assert spec.null_reason == "unknown"
+        assert spec.description is None
+
+    def test_full_spec(self):
+        spec = MissingSpec(
+            reasons={-99: "not_asked", -98: "refused", -97: "dont_know"},
+            categories={"item_nonresponse": ["refused", "dont_know"], "design": ["not_asked"]},
+            null_is_missing=False,
+            null_reason="system",
+            description="Standard survey codes",
+        )
+        assert spec.null_is_missing is False
+        assert spec.null_reason == "system"
+        assert spec.description == "Standard survey codes"
+
+    def test_exported_from_top_level(self):
+        assert pb.MissingSpec is MissingSpec
+
+    def test_reasons_must_be_dict(self):
+        with pytest.raises(TypeError):
+            MissingSpec(reasons=[-99, -98])  # type: ignore[arg-type]
+
+    def test_empty_reasons_requires_null_is_missing(self):
+        # OK: empty reasons but null_is_missing=True
+        MissingSpec(reasons={}, null_is_missing=True)
+        # Not OK: empty reasons and null_is_missing=False
+        with pytest.raises(ValueError):
+            MissingSpec(reasons={}, null_is_missing=False)
+
+    def test_reason_labels_must_be_strings(self):
+        with pytest.raises(TypeError):
+            MissingSpec(reasons={-99: 1})  # type: ignore[dict-item]
+
+    def test_category_must_reference_known_reasons(self):
+        with pytest.raises(ValueError, match="unknown reason"):
+            MissingSpec(
+                reasons={-99: "not_asked"},
+                categories={"bad": ["nonexistent"]},
+            )
+
+    def test_category_can_reference_null_reason(self):
+        spec = MissingSpec(
+            reasons={-99: "not_asked"},
+            categories={"all_absent": ["not_asked", "unknown"]},
+            null_is_missing=True,
+        )
+        assert spec.values_for_category("all_absent") == [-99]
+
+    def test_categories_must_be_dict(self):
+        with pytest.raises(TypeError):
+            MissingSpec(reasons={-99: "not_asked"}, categories=["not_asked"])  # type: ignore[arg-type]
+
+
+class TestMissingSpecMethods:
+    @pytest.fixture
+    def spec(self):
+        return MissingSpec(
+            reasons={-99: "not_asked", -98: "refused", -97: "dont_know", -96: "not_applicable"},
+            categories={
+                "item_nonresponse": ["refused", "dont_know"],
+                "design": ["not_asked", "not_applicable"],
+            },
+        )
+
+    def test_sentinel_values(self, spec):
+        assert spec.sentinel_values() == [-99, -98, -97, -96]
+
+    def test_reason_for(self, spec):
+        assert spec.reason_for(-98) == "refused"
+        assert spec.reason_for(5) is None
+
+    def test_reason_for_null(self, spec):
+        assert spec.reason_for(None) == "unknown"
+        spec_no_null = MissingSpec(reasons={-99: "not_asked"}, null_is_missing=False)
+        assert spec_no_null.reason_for(None) is None
+
+    def test_is_missing(self, spec):
+        assert spec.is_missing(-99) is True
+        assert spec.is_missing(42) is False
+        assert spec.is_missing(None) is True
+
+    def test_is_missing_null_excluded(self):
+        spec = MissingSpec(reasons={-99: "not_asked"}, null_is_missing=False)
+        assert spec.is_missing(None) is False
+
+    def test_values_for_reason(self, spec):
+        assert spec.values_for_reason("refused") == [-98]
+        assert spec.values_for_reason("nonexistent") == []
+
+    def test_values_for_category(self, spec):
+        assert spec.values_for_category("item_nonresponse") == [-98, -97]
+        assert spec.values_for_category("design") == [-99, -96]
+        assert spec.values_for_category("nonexistent") == []
+
+    def test_values_for_category_no_categories(self):
+        spec = MissingSpec(reasons={-99: "not_asked"})
+        assert spec.values_for_category("anything") == []
+
+    def test_reasons_list(self, spec):
+        assert spec.reasons_list() == [
+            "not_asked",
+            "refused",
+            "dont_know",
+            "not_applicable",
+            "unknown",
+        ]
+
+    def test_reasons_list_no_null(self):
+        spec = MissingSpec(reasons={-99: "a", -98: "b"}, null_is_missing=False)
+        assert spec.reasons_list() == ["a", "b"]

From e1b6faa1d339b3580577ee07e74ce34c5e312d06 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:39:17 -0400
Subject: [PATCH 18/55] Apply structured missingness to valdn methods

---
 pointblank/validate.py | 186 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 185 insertions(+), 1 deletion(-)

diff --git a/pointblank/validate.py b/pointblank/validate.py
index f9cc73c7d..78bdeeced 100644
--- a/pointblank/validate.py
+++ b/pointblank/validate.py
@@ -61,6 +61,7 @@
 from pointblank._interrogation import (
     NumberOfTestUnits,
     SpeciallyValidation,
+    apply_missing_exclusion,
     col_count_match,
     col_exists,
     col_pct_missing,
@@ -2677,7 +2678,136 @@ def _generate_display_table(
     return gt_tbl
 
 
-def missing_vals_tbl(data: Any) -> GT:
+def _prettify_reason_label(reason: str) -> str:
+    """Turn a snake_case reason label into a Title Case display label (e.g. 'not_asked' ->
+    'Not Asked')."""
+    return reason.replace("_", " ").title()
+
+
+def _build_structured_missing_tbl(data: Any, missing: dict[str, MissingSpec]) -> GT:
+    """Build a structured-missingness breakdown table (one row per column, columns for the count
+    and percentage of complete values and of each missing reason)."""
+    if not isinstance(missing, dict):
+        raise TypeError(
+            f"`missing=` must be a dict mapping column names to MissingSpec objects, "
+            f"got {type(missing).__name__}."
+        )
+    for col_name, spec in missing.items():
+        if not isinstance(spec, MissingSpec):
+            raise TypeError(
+                f"`missing[{col_name!r}]` must be a MissingSpec, got {type(spec).__name__}."
+            )
+
+    nw_frame = nw.from_native(data)
+    is_lazy = isinstance(nw_frame, nw.LazyFrame)
+
+    available_columns = list(nw_frame.columns)
+
+    # Build the ordered union of reason labels across all specs (first-seen order)
+    reason_order: list[str] = []
+    for spec in missing.values():
+        for r in spec.reasons_list():
+            if r not in reason_order:
+                reason_order.append(r)
+
+    records: list[dict[str, Any]] = []
+    for column, spec in missing.items():
+        if column not in available_columns:
+            raise ValueError(
+                f"Column '{column}' given in `missing=` was not found in the table."
+            )
+
+        # Build one aggregation per reason that has an expression (sentinels and/or nulls)
+        select_exprs: dict[str, Any] = {"__total__": nw.len()}
+        reason_alias: dict[str, str] = {}
+        for i, r in enumerate(spec.reasons_list()):
+            sentinels = spec.values_for_reason(r)
+            expr = None
+            if sentinels:
+                expr = nw.col(column).is_in(sentinels)
+            if r == spec.null_reason and spec.null_is_missing:
+                null_expr = nw.col(column).is_null()
+                expr = null_expr if expr is None else (expr | null_expr)
+            if expr is not None:
+                alias = f"__r{i}__"
+                reason_alias[r] = alias
+                select_exprs[alias] = expr.cast(nw.Int32).sum()
+
+        out = nw_frame.select(**select_exprs)
+        if is_lazy:
+            out = out.collect()
+
+        total = int(out["__total__"][0])
+        counts: dict[str, int] = {}
+        for r in spec.reasons_list():
+            counts[r] = int(out[reason_alias[r]][0]) if r in reason_alias else 0
+
+        total_missing = sum(counts.values())
+        complete = total - total_missing
+
+        def _fmt(count: int) -> str:
+            pct = round(100 * count / total) if total > 0 else 0
+            return f"{count} ({pct}%)"
+
+        record: dict[str, Any] = {
+            "columns": column,
+            "total_n": str(total),
+            "complete": _fmt(complete),
+        }
+        # Fill every reason column in the union (0 for reasons this spec doesn't define)
+        for r in reason_order:
+            record[r] = _fmt(counts.get(r, 0))
+        records.append(record)
+
+    # Build a DataFrame from the records using the available DataFrame library
+    df_lib_gt = _select_df_lib(preference="polars")
+    if df_lib_gt.__name__ == "polars":
+        import polars as pl
+
+        breakdown_df = pl.DataFrame(records)
+    else:
+        import pandas as pd
+
+        breakdown_df = pd.DataFrame(records)
+
+    title = "Missing Values by Reason"
+    subtitle = "Counts and percentages of complete values and each missing reason, per column."
+
+    cols_labels = {
+        "columns": "Column",
+        "total_n": "Total N",
+        "complete": "Complete",
+    }
+    for r in reason_order:
+        cols_labels[r] = _prettify_reason_label(r)
+
+    value_columns = ["total_n", "complete"] + reason_order
+
+    gt_tbl = (
+        GT(breakdown_df)
+        .tab_header(title=html(f"<div style='font-size: 14px;'>{title}</div>"), subtitle=subtitle)
+        .opt_table_font(font=google_font(name="IBM Plex Sans"))
+        .opt_align_table_header(align="left")
+        .cols_label(cases=cols_labels)
+        .cols_align(align="right", columns=value_columns)
+        .cols_align(align="left", columns="columns")
+        .tab_style(
+            style=style.text(font=google_font(name="IBM Plex Mono"), size="12px"),
+            locations=loc.body(columns=value_columns),
+        )
+        .tab_style(
+            style=style.text(weight="bold"),
+            locations=loc.body(columns="columns"),
+        )
+    )
+
+    if version("great_tables") >= "0.17.0":
+        gt_tbl = gt_tbl.tab_options(quarto_disable_processing=True)
+
+    return gt_tbl
+
+
+def missing_vals_tbl(data: Any, missing: dict[str, MissingSpec] | None = None) -> GT:
     """
     Display a table that shows the missing values in the input table.
 
@@ -2685,12 +2815,23 @@ def missing_vals_tbl(data: Any) -> GT:
     table. The table is displayed using the Great Tables API, which allows for further customization
     of the table's appearance if so desired.
 
+    By default, missingness is treated as binary (a value is either Null or it isn't) and the
+    function renders a sector-based heatmap of the proportion of Null values across the rows of each
+    column. When a `missing=` mapping of columns to [`MissingSpec`](`pointblank.MissingSpec`) objects
+    is supplied, the function instead renders a *structured missingness* breakdown: one row per
+    column with the count and percentage of complete values and of each missing *reason* (e.g.,
+    "Refused", "Not Asked", "Unknown").
+
     Parameters
     ----------
     data
         The table for which to display the missing values. This could be a DataFrame object, an
         Ibis table object, a CSV file path, a Parquet file path, or a database connection string.
         Read the *Supported Input Table Types* section for details on the supported table types.
+    missing
+        An optional dictionary mapping column names to [`MissingSpec`](`pointblank.MissingSpec`)
+        objects. When provided, the function renders a structured breakdown of missingness by
+        reason for the specified columns (rather than the default sector heatmap).
 
     Returns
     -------
@@ -2768,6 +2909,12 @@ def missing_vals_tbl(data: Any) -> GT:
     if "pyspark" not in tbl_type:
         data = copy.deepcopy(data)
 
+    # If a `missing=` spec mapping is provided, render the structured missingness breakdown
+    # (count and percentage of complete values and each missing reason, per column) instead of
+    # the default sector heatmap
+    if missing is not None:
+        return _build_structured_missing_tbl(data=data, missing=missing)
+
     # Get the number of rows in the table
     n_rows = get_row_count(data)
 
@@ -3818,6 +3965,7 @@ def from_agg_validator(
     values: Any | list[Any] | tuple | None = None
     inclusive: tuple[bool, bool] | None = None
     na_pass: bool | None = None
+    missing: Any | None = None
     pre: Callable | None = None
     segments: Any | None = None
     thresholds: Thresholds | None = None
@@ -5211,6 +5359,7 @@ def col_vals_gt(
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         value: float | int | Column,
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -5483,6 +5632,7 @@ def col_vals_gt(
                 column=column,
                 values=value,
                 na_pass=na_pass,
+                missing=missing,
                 pre=pre,
                 segments=segments,
                 thresholds=thresholds,
@@ -5500,6 +5650,7 @@ def col_vals_lt(
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         value: float | int | Column,
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -5779,6 +5930,7 @@ def col_vals_lt(
                 column=column,
                 values=value,
                 na_pass=na_pass,
+                missing=missing,
                 pre=pre,
                 segments=segments,
                 thresholds=thresholds,
@@ -5796,6 +5948,7 @@ def col_vals_eq(
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         value: float | int | Column,
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -6075,6 +6228,7 @@ def col_vals_eq(
                 column=column,
                 values=value,
                 na_pass=na_pass,
+                missing=missing,
                 pre=pre,
                 segments=segments,
                 thresholds=thresholds,
@@ -6092,6 +6246,7 @@ def col_vals_ne(
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         value: float | int | Column,
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -6369,6 +6524,7 @@ def col_vals_ne(
                 column=column,
                 values=value,
                 na_pass=na_pass,
+                missing=missing,
                 pre=pre,
                 segments=segments,
                 thresholds=thresholds,
@@ -6386,6 +6542,7 @@ def col_vals_ge(
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         value: float | int | Column,
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -6666,6 +6823,7 @@ def col_vals_ge(
                 column=column,
                 values=value,
                 na_pass=na_pass,
+                missing=missing,
                 pre=pre,
                 segments=segments,
                 thresholds=thresholds,
@@ -6683,6 +6841,7 @@ def col_vals_le(
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         value: float | int | Column,
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -6963,6 +7122,7 @@ def col_vals_le(
                 column=column,
                 values=value,
                 na_pass=na_pass,
+                missing=missing,
                 pre=pre,
                 segments=segments,
                 thresholds=thresholds,
@@ -6982,6 +7142,7 @@ def col_vals_between(
         right: float | int | Column,
         inclusive: tuple[bool, bool] = (True, True),
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -7288,6 +7449,7 @@ def col_vals_between(
                 values=value,
                 inclusive=inclusive,
                 na_pass=na_pass,
+                missing=missing,
                 pre=pre,
                 segments=segments,
                 thresholds=thresholds,
@@ -7307,6 +7469,7 @@ def col_vals_outside(
         right: float | int | Column,
         inclusive: tuple[bool, bool] = (True, True),
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -7613,6 +7776,7 @@ def col_vals_outside(
                 values=value,
                 inclusive=inclusive,
                 na_pass=na_pass,
+                missing=missing,
                 pre=pre,
                 segments=segments,
                 thresholds=thresholds,
@@ -7629,6 +7793,7 @@ def col_vals_in_set(
         self,
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         set: Collection[Any],
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -7935,6 +8100,7 @@ class Color(Enum):
                 assertion_type=assertion_type,
                 column=column,
                 values=set,
+                missing=missing,
                 pre=pre,
                 segments=segments,
                 thresholds=thresholds,
@@ -7951,6 +8117,7 @@ def col_vals_not_in_set(
         self,
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         set: Collection[Any],
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -8229,6 +8396,7 @@ class InvalidStatus(Enum):
                 assertion_type=assertion_type,
                 column=column,
                 values=set,
+                missing=missing,
                 pre=pre,
                 segments=segments,
                 thresholds=thresholds,
@@ -8247,6 +8415,7 @@ def col_vals_increasing(
         allow_stationary: bool = False,
         decreasing_tol: float | None = None,
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -8425,6 +8594,7 @@ def col_vals_increasing(
                 column=column,
                 values="",
                 na_pass=na_pass,
+                missing=missing,
                 pre=pre,
                 segments=segments,
                 thresholds=thresholds,
@@ -8447,6 +8617,7 @@ def col_vals_decreasing(
         allow_stationary: bool = False,
         increasing_tol: float | None = None,
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -8625,6 +8796,7 @@ def col_vals_decreasing(
                 column=column,
                 values="",
                 na_pass=na_pass,
+                missing=missing,
                 pre=pre,
                 segments=segments,
                 thresholds=thresholds,
@@ -9143,6 +9315,7 @@ def col_vals_regex(
         pattern: str,
         na_pass: bool = False,
         inverse: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -9394,6 +9567,7 @@ def col_vals_regex(
                 column=column,
                 values=values,
                 na_pass=na_pass,
+                missing=missing,
                 pre=pre,
                 segments=segments,
                 thresholds=thresholds,
@@ -9411,6 +9585,7 @@ def col_vals_within_spec(
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         spec: str,
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -9684,6 +9859,7 @@ def col_vals_within_spec(
                 column=column,
                 values=values,
                 na_pass=na_pass,
+                missing=missing,
                 pre=pre,
                 segments=segments,
                 thresholds=thresholds,
@@ -14199,6 +14375,14 @@ def interrogate(
                                 tbl=tbl, column=column, values=value, na_pass=na_pass
                             )
 
+                        # Apply structured-missingness exclusion: any row whose value is a
+                        # declared sentinel (or a null when `null_is_missing=True`) is treated
+                        # as a passing test unit, so only the "real" values are validated
+                        if validation.missing is not None and results_tbl is not None:
+                            results_tbl = apply_missing_exclusion(
+                                results_tbl=results_tbl, column=column, spec=validation.missing
+                            )
+
                     elif assertion_type == "col_pct_null":
                         result_bool = col_pct_null(
                             data_tbl=data_tbl_step,

From 4fedb0e1e198c4d1c96752ead0dabcc3ec065079 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:39:45 -0400
Subject: [PATCH 19/55] Add the apply_missing_exclusion() util function

---
 pointblank/_interrogation.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/pointblank/_interrogation.py b/pointblank/_interrogation.py
index 8485ef96e..2e1e8e8af 100644
--- a/pointblank/_interrogation.py
+++ b/pointblank/_interrogation.py
@@ -2580,6 +2580,36 @@ def interrogate_not_null(tbl: IntoFrame, column: str) -> Any:
     return result_tbl.to_native()
 
 
+def apply_missing_exclusion(results_tbl: IntoFrame, column: str, spec: Any) -> Any:
+    """Mark rows with structured-missing values as passing.
+
+    Given a `results_tbl` that already carries a boolean `pb_is_good_` column, force that column to
+    `True` for any row whose value in `column` is a declared sentinel of `spec` (a `MissingSpec`),
+    or a null when `spec.null_is_missing` is `True`. This implements the `missing=` exclusion on
+    `col_vals_*` validation methods: sentinel/missing values are excluded from the check (they pass)
+    so that only the "real" values are validated.
+    """
+    sentinels = spec.sentinel_values()
+
+    # Build a null-free boolean mask. Note `is_in()` yields null for null inputs, and OR-ing a null
+    # into `pb_is_good_` would corrupt a failing row (False | null = null under Kleene logic), so the
+    # sentinel mask is explicitly filled with `False` for null rows.
+    mask = None
+    if sentinels:
+        mask = nw.col(column).is_in(sentinels).fill_null(False)
+    if spec.null_is_missing:
+        null_expr = nw.col(column).is_null()
+        mask = null_expr if mask is None else (mask | null_expr)
+
+    if mask is None:
+        return results_tbl
+
+    nw_tbl = nw.from_native(results_tbl)
+    assert isinstance(nw_tbl, (nw.DataFrame, nw.LazyFrame))
+    nw_tbl = nw_tbl.with_columns(pb_is_good_=(nw.col("pb_is_good_") | mask))
+    return nw_tbl.to_native()
+
+
 def interrogate_missing_coded(tbl: IntoFrame, column: str) -> Any:
     """Missing-coded interrogation.
 

From 9ccaa5914a78d6b89a0d6bbeb40535b887aa3ea4 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:39:47 -0400
Subject: [PATCH 20/55] Update validate.pyi

---
 pointblank/validate.pyi | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/pointblank/validate.pyi b/pointblank/validate.pyi
index 47124b150..359686cf2 100644
--- a/pointblank/validate.pyi
+++ b/pointblank/validate.pyi
@@ -78,7 +78,7 @@ def preview(
     min_tbl_width: int = 500,
     incl_header: bool | None = None,
 ) -> GT: ...
-def missing_vals_tbl(data: Any) -> GT: ...
+def missing_vals_tbl(data: Any, missing: dict[str, MissingSpec] | None = None) -> GT: ...
 def get_column_count(data: Any) -> int: ...
 def get_row_count(data: Any) -> int: ...
 @dataclass
@@ -179,6 +179,7 @@ class Validate:
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         value: float | int | Column,
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -191,6 +192,7 @@ class Validate:
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         value: float | int | Column,
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -203,6 +205,7 @@ class Validate:
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         value: float | int | Column,
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -215,6 +218,7 @@ class Validate:
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         value: float | int | Column,
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -227,6 +231,7 @@ class Validate:
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         value: float | int | Column,
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -239,6 +244,7 @@ class Validate:
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         value: float | int | Column,
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -253,6 +259,7 @@ class Validate:
         right: float | int | Column,
         inclusive: tuple[bool, bool] = (True, True),
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -267,6 +274,7 @@ class Validate:
         right: float | int | Column,
         inclusive: tuple[bool, bool] = (True, True),
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -278,6 +286,7 @@ class Validate:
         self,
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         set: Collection[Any],
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -289,6 +298,7 @@ class Validate:
         self,
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         set: Collection[Any],
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -302,6 +312,7 @@ class Validate:
         allow_stationary: bool = False,
         decreasing_tol: float | None = None,
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -315,6 +326,7 @@ class Validate:
         allow_stationary: bool = False,
         increasing_tol: float | None = None,
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -348,6 +360,7 @@ class Validate:
         pattern: str,
         na_pass: bool = False,
         inverse: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
@@ -360,6 +373,7 @@ class Validate:
         columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
         spec: str,
         na_pass: bool = False,
+        missing: MissingSpec | None = None,
         pre: Callable | None = None,
         segments: SegmentSpec | None = None,
         thresholds: int | float | bool | tuple | dict | Thresholds | None = None,

From b9aaf1f938b3c1c627fb7f1713005d8168c64cbc Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:39:54 -0400
Subject: [PATCH 21/55] Update yaml.py

---
 pointblank/yaml.py | 96 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 93 insertions(+), 3 deletions(-)

diff --git a/pointblank/yaml.py b/pointblank/yaml.py
index 4bdd67cd2..455d6744f 100644
--- a/pointblank/yaml.py
+++ b/pointblank/yaml.py
@@ -8,9 +8,39 @@
 
 from pointblank._agg import is_valid_agg
 from pointblank._utils import _is_lib_present
+from pointblank.missing import MissingSpec
 from pointblank.thresholds import Actions
 from pointblank.validate import Validate, load_dataset
 
+
+def _missing_spec_from_dict(spec_def: dict) -> MissingSpec:
+    """Build a `MissingSpec` from a YAML mapping."""
+    if not isinstance(spec_def, dict):
+        raise YAMLValidationError(
+            f"A missing spec must be a mapping, got {type(spec_def).__name__}."
+        )
+    return MissingSpec(
+        reasons=spec_def.get("reasons", {}),
+        categories=spec_def.get("categories"),
+        null_is_missing=spec_def.get("null_is_missing", True),
+        null_reason=spec_def.get("null_reason", "unknown"),
+        description=spec_def.get("description"),
+    )
+
+
+def _missing_spec_to_code(spec: MissingSpec) -> str:
+    """Render a `MissingSpec` as a `pb.MissingSpec(...)` constructor call for code generation."""
+    parts = [f"reasons={spec.reasons!r}"]
+    if spec.categories is not None:
+        parts.append(f"categories={spec.categories!r}")
+    if spec.null_is_missing is not True:
+        parts.append(f"null_is_missing={spec.null_is_missing!r}")
+    if spec.null_reason != "unknown":
+        parts.append(f"null_reason={spec.null_reason!r}")
+    if spec.description is not None:
+        parts.append(f"description={spec.description!r}")
+    return f"pb.MissingSpec({', '.join(parts)})"
+
 if TYPE_CHECKING:
     from typing import Literal
 
@@ -243,6 +273,8 @@ class YAMLValidator:
         "col_vals_decreasing": "col_vals_decreasing",
         "col_vals_within_spec": "col_vals_within_spec",
         "col_pct_null": "col_pct_null",
+        "col_pct_missing": "col_pct_missing",
+        "col_missing_coded": "col_missing_coded",
         "rows_distinct": "rows_distinct",
         "rows_complete": "rows_complete",
         "col_count_match": "col_count_match",
@@ -332,6 +364,7 @@ def _validate_schema(self, config: dict) -> None:
             "steps",
             "tbl_name",
             "label",
+            "missing_specs",
             "thresholds",
             "actions",
             "final_actions",
@@ -608,10 +641,45 @@ def _parse_schema_spec(self, schema_spec: Any) -> Any:
                 f"Schema specification must be a dictionary, got: {type(schema_spec)}"
             )
 
+    def _parse_missing_specs(self, config: dict) -> dict[str, MissingSpec]:
+        """Parse the top-level `missing_specs` block into named `MissingSpec` objects."""
+        raw = config.get("missing_specs")
+        if raw is None:
+            return {}
+        if not isinstance(raw, dict):
+            raise YAMLValidationError("'missing_specs' must be a dictionary of named specs")
+        return {name: _missing_spec_from_dict(spec_def) for name, spec_def in raw.items()}
+
+    def _resolve_missing(
+        self, value: Any, missing_specs: Optional[dict[str, MissingSpec]]
+    ) -> MissingSpec:
+        """Resolve a step's `missing=` value to a `MissingSpec`.
+
+        The value can be a named reference into the top-level `missing_specs` block, an inline
+        mapping defining a spec, or an already-constructed `MissingSpec`.
+        """
+        if isinstance(value, MissingSpec):
+            return value
+        if isinstance(value, str):
+            if not missing_specs or value not in missing_specs:
+                available = sorted(missing_specs.keys()) if missing_specs else []
+                raise YAMLValidationError(
+                    f"Unknown missing spec '{value}'. Define it under the top-level "
+                    f"'missing_specs' block. Available: {available}"
+                )
+            return missing_specs[value]
+        if isinstance(value, dict):
+            return _missing_spec_from_dict(value)
+        raise YAMLValidationError(
+            f"Invalid 'missing' value: {value!r}. Use a named reference, an inline mapping, "
+            "or a MissingSpec."
+        )
+
     def _parse_validation_step(
         self,
         step_config: Union[str, dict],
         namespaces: Optional[Union[Iterable[str], Mapping[str, str]]] = None,
+        missing_specs: Optional[dict[str, MissingSpec]] = None,
     ) -> tuple[str, dict]:
         """Parse a single validation step from YAML configuration.
 
@@ -676,6 +744,10 @@ def _parse_validation_step(
             # (e.g., `active: pb.has_columns("col_a")` or `active: false`)
             elif key == "active" and isinstance(value, str):
                 processed_parameters[key] = _safe_eval_python_code(value, namespaces=namespaces)
+            elif key == "missing":
+                # Pass the raw value through (a spec name, inline mapping, or MissingSpec); it is
+                # resolved to a MissingSpec below, after the loop
+                processed_parameters[key] = value
             else:
                 # Normal processing (requires python: block syntax)
                 processed_parameters[key] = _process_python_expressions(
@@ -683,6 +755,11 @@ def _parse_validation_step(
                 )
         parameters = processed_parameters
 
+        # Resolve a `missing=` parameter (used by col_pct_missing, col_missing_coded) into a
+        # MissingSpec, looking up named references in the top-level `missing_specs` block
+        if "missing" in parameters:
+            parameters["missing"] = self._resolve_missing(parameters["missing"], missing_specs)
+
         # Convert `columns=` specification
         if "columns" in parameters:
             parameters["columns"] = self._parse_column_spec(parameters["columns"])
@@ -832,10 +909,13 @@ def build_validation(
 
         validation = Validate(data, **validate_kwargs)
 
+        # Parse any named missing specs declared at the top level
+        missing_specs = self._parse_missing_specs(config)
+
         # Add validation steps
         for step_config in config["steps"]:
             method_name, parameters = self._parse_validation_step(
-                step_config, namespaces=namespaces
+                step_config, namespaces=namespaces, missing_specs=missing_specs
             )
 
             # Get the method from the validation object
@@ -1644,6 +1724,9 @@ def extract_python_expressions(obj, path=""):
     validator = YAMLValidator()
     config = validator.load_config(yaml)
 
+    # Parse any named missing specs so steps referencing them can be rendered
+    missing_specs = validator._parse_missing_specs(config)
+
     # Start building the Python code
     code_lines = []
 
@@ -1780,7 +1863,9 @@ def extract_python_expressions(obj, path=""):
 
         # Handle string steps (parameterless methods like "rows_distinct")
         if isinstance(step_config, str):
-            method_name, parameters = validator._parse_validation_step(step_config, namespaces=None)
+            method_name, parameters = validator._parse_validation_step(
+                step_config, namespaces=None, missing_specs=missing_specs
+            )
             code_lines.append(f"    .{method_name}()")
             continue
 
@@ -1802,7 +1887,9 @@ def extract_python_expressions(obj, path=""):
             elif isinstance(step_params["expr"], str):
                 original_expressions["expr"] = step_params["expr"]
 
-        method_name, parameters = validator._parse_validation_step(step_config, namespaces=None)
+        method_name, parameters = validator._parse_validation_step(
+            step_config, namespaces=None, missing_specs=missing_specs
+        )
 
         # Apply the original expressions to override the converted lambda functions
         if method_name == "conjointly" and "expressions" in original_expressions:
@@ -1852,6 +1939,9 @@ def extract_python_expressions(obj, path=""):
                         param_parts.append(f"{key}={columns_str}")
                 else:
                     param_parts.append(f'{key}="{value}"')  # pragma: no cover
+            elif key == "missing" and isinstance(value, MissingSpec):
+                # Render a resolved MissingSpec as a `pb.MissingSpec(...)` constructor call
+                param_parts.append(f"missing={_missing_spec_to_code(value)}")
             elif key == "brief":
                 # Handle `brief=` parameter: can be a boolean or a string
                 if isinstance(value, bool):

From b1ab501690f07bedf6e8e8f89c6711e33484de59 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:39:59 -0400
Subject: [PATCH 22/55] Create test_col_vals_missing_param.py

---
 tests/test_col_vals_missing_param.py | 94 ++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 tests/test_col_vals_missing_param.py

diff --git a/tests/test_col_vals_missing_param.py b/tests/test_col_vals_missing_param.py
new file mode 100644
index 000000000..ea86b4dde
--- /dev/null
+++ b/tests/test_col_vals_missing_param.py
@@ -0,0 +1,94 @@
+import polars as pl
+import pandas as pd
+import pytest
+
+import pointblank as pb
+
+
+@pytest.fixture
+def spec():
+    return pb.MissingSpec(reasons={-99: "not_asked", -98: "refused"})
+
+
+@pytest.fixture
+def spec_no_null():
+    return pb.MissingSpec(reasons={-99: "not_asked", -98: "refused"}, null_is_missing=False)
+
+
+def _info(v):
+    return v.validation_info[0]
+
+
+class TestMissingExclusion:
+    def test_between_excludes_sentinels_and_nulls(self, spec):
+        tbl = pl.DataFrame({"age": [34, -98, 41, -99, 29, 200, 55, None]})
+        v = (
+            pb.Validate(data=tbl)
+            .col_vals_between(columns="age", left=0, right=120, missing=spec)
+            .interrogate()
+        )
+        info = _info(v)
+        assert info.n == 8
+        # only 200 is a real out-of-range value
+        assert info.n_failed == 1
+
+    def test_gt_excludes(self, spec):
+        tbl = pl.DataFrame({"age": [34, -98, 41, -99, 29, 55]})
+        v = pb.Validate(data=tbl).col_vals_gt(columns="age", value=0, missing=spec).interrogate()
+        assert _info(v).n_failed == 0
+
+    def test_null_not_excluded_when_spec_says_so(self, spec_no_null):
+        # null_is_missing=False -> nulls are NOT excluded; with na_pass default False, null fails gt
+        tbl = pl.DataFrame({"age": [34, -98, None, 41]})
+        v = (
+            pb.Validate(data=tbl)
+            .col_vals_gt(columns="age", value=0, missing=spec_no_null)
+            .interrogate()
+        )
+        # -98 excluded (passes); null fails (na_pass False); reals pass -> 1 failure
+        assert _info(v).n_failed == 1
+
+    def test_in_set_excludes_sentinels(self, spec):
+        tbl = pl.DataFrame({"grade": [1, 2, -99, 3, -98, 9]})
+        v = (
+            pb.Validate(data=tbl)
+            .col_vals_in_set(columns="grade", set=[1, 2, 3], missing=spec)
+            .interrogate()
+        )
+        # 9 is the only real value not in the set
+        assert _info(v).n_failed == 1
+
+    def test_regex_excludes_string_sentinels(self):
+        spec = pb.MissingSpec(reasons={"N/A": "not_applicable", "REF": "refused"})
+        tbl = pl.DataFrame({"code": ["AB12", "N/A", "CD34", "REF", "bad code"]})
+        v = (
+            pb.Validate(data=tbl)
+            .col_vals_regex(columns="code", pattern=r"^[A-Z]{2}[0-9]{2}$", missing=spec)
+            .interrogate()
+        )
+        # "bad code" is the only real non-matching value
+        assert _info(v).n_failed == 1
+
+    def test_no_missing_param_unchanged(self):
+        tbl = pl.DataFrame({"age": [34, -98, 41]})
+        v = pb.Validate(data=tbl).col_vals_gt(columns="age", value=0).interrogate()
+        # -98 is a real value < 0 -> fails when missing= not used
+        assert _info(v).n_failed == 1
+
+    def test_pandas_backend(self, spec):
+        tbl = pd.DataFrame({"age": [34, -98, 41, -99, 200]})
+        v = (
+            pb.Validate(data=tbl)
+            .col_vals_between(columns="age", left=0, right=120, missing=spec)
+            .interrogate()
+        )
+        assert _info(v).n_failed == 1
+
+    def test_report_renders(self, spec):
+        tbl = pl.DataFrame({"age": [34, -98, 41, 200]})
+        v = (
+            pb.Validate(data=tbl)
+            .col_vals_between(columns="age", left=0, right=120, missing=spec)
+            .interrogate()
+        )
+        assert v.get_tabular_report() is not None

From 7ef7d3503ae2a4710d5f0efe780ec867349d48c2 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:40:01 -0400
Subject: [PATCH 23/55] Create test_missing_vals_tbl_structured.py

---
 tests/test_missing_vals_tbl_structured.py | 86 +++++++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 tests/test_missing_vals_tbl_structured.py

diff --git a/tests/test_missing_vals_tbl_structured.py b/tests/test_missing_vals_tbl_structured.py
new file mode 100644
index 000000000..30c2f9907
--- /dev/null
+++ b/tests/test_missing_vals_tbl_structured.py
@@ -0,0 +1,86 @@
+import polars as pl
+import pandas as pd
+import pytest
+from great_tables import GT
+
+import pointblank as pb
+
+
+@pytest.fixture
+def tbl_pl():
+    return pl.DataFrame(
+        {
+            "age": [34, -98, 41, -99, 29, -98, 55, None],
+            "income": [50000, -99, -1, None, 42000, -99, 38000, 61000],
+        }
+    )
+
+
+@pytest.fixture
+def specs():
+    return {
+        "age": pb.MissingSpec(reasons={-99: "not_asked", -98: "refused", -97: "dont_know"}),
+        "income": pb.MissingSpec(reasons={-99: "not_asked", -1: "below_threshold"}),
+    }
+
+
+class TestStructuredMissingTbl:
+    def test_returns_gt(self, tbl_pl, specs):
+        result = pb.missing_vals_tbl(tbl_pl, missing=specs)
+        assert isinstance(result, GT)
+
+    def test_reason_columns_present(self, tbl_pl, specs):
+        html = pb.missing_vals_tbl(tbl_pl, missing=specs).as_raw_html()
+        for token in [
+            "Not Asked",
+            "Refused",
+            "Dont Know",
+            "Below Threshold",
+            "Unknown",
+            "Complete",
+            "Total N",
+        ]:
+            assert token in html
+
+    def test_counts_correct(self, tbl_pl):
+        # age: total 8 -> refused 2 (25%), not_asked 1 (12%), dont_know 0 (0%),
+        #      unknown/null 1 (12%), complete 4 (50%)
+        spec = pb.MissingSpec(reasons={-99: "not_asked", -98: "refused", -97: "dont_know"})
+        html = pb.missing_vals_tbl(tbl_pl, missing={"age": spec}).as_raw_html()
+        assert "4 (50%)" in html  # complete
+        assert "2 (25%)" in html  # refused
+        assert "1 (12%)" in html  # not_asked / unknown
+        assert "0 (0%)" in html  # dont_know
+
+    def test_null_excluded_when_spec_says_so(self):
+        # null_is_missing=False -> the null is counted as complete, no Unknown column
+        tbl = pl.DataFrame({"age": [34, -98, 41, None]})
+        spec = pb.MissingSpec(reasons={-98: "refused"}, null_is_missing=False)
+        html = pb.missing_vals_tbl(tbl, missing={"age": spec}).as_raw_html()
+        assert "Unknown" not in html
+        # complete = 3 (null + 2 reals) of 4 = 75%
+        assert "3 (75%)" in html
+
+    def test_pandas_input(self, specs):
+        tbl = pd.DataFrame(
+            {
+                "age": [34, -98, 41, -99, 29, -98, 55, None],
+                "income": [50000, -99, -1, None, 42000, -99, 38000, 61000],
+            }
+        )
+        result = pb.missing_vals_tbl(tbl, missing=specs)
+        assert isinstance(result, GT)
+
+    def test_default_behavior_unchanged(self, tbl_pl):
+        # No missing= -> the original sector heatmap path
+        result = pb.missing_vals_tbl(tbl_pl)
+        assert isinstance(result, GT)
+
+    def test_missing_must_be_dict_of_specs(self, tbl_pl):
+        with pytest.raises(TypeError):
+            pb.missing_vals_tbl(tbl_pl, missing={"age": {-99: "x"}})
+
+    def test_unknown_column_raises(self, tbl_pl):
+        spec = pb.MissingSpec(reasons={-99: "not_asked"})
+        with pytest.raises(ValueError, match="not found"):
+            pb.missing_vals_tbl(tbl_pl, missing={"nonexistent": spec})

From 40b33006ce67396863232f7af6c5c78a8d1c8970 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:40:04 -0400
Subject: [PATCH 24/55] Update test_validate.py

---
 tests/test_validate.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_validate.py b/tests/test_validate.py
index 64e8718d0..6ff03cdfa 100644
--- a/tests/test_validate.py
+++ b/tests/test_validate.py
@@ -833,6 +833,7 @@ def test_validation_plan_and_interrogation(request, tbl_fixture) -> None:
         "values",
         "inclusive",
         "na_pass",
+        "missing",
         "pre",
         "segments",
         "thresholds",
@@ -915,6 +916,7 @@ def test_validation_plan_and_interrogation(request, tbl_fixture) -> None:
         "values",
         "inclusive",
         "na_pass",
+        "missing",
         "pre",
         "segments",
         "thresholds",

From 274c2e32f10f2bd7847c770d1b2179771462a5aa Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 15:40:08 -0400
Subject: [PATCH 25/55] Create test_yaml_missing_specs.py

---
 tests/test_yaml_missing_specs.py | 127 +++++++++++++++++++++++++++++++
 1 file changed, 127 insertions(+)
 create mode 100644 tests/test_yaml_missing_specs.py

diff --git a/tests/test_yaml_missing_specs.py b/tests/test_yaml_missing_specs.py
new file mode 100644
index 000000000..9937bd939
--- /dev/null
+++ b/tests/test_yaml_missing_specs.py
@@ -0,0 +1,127 @@
+import polars as pl
+import pytest
+
+import pointblank as pb
+from pointblank.yaml import YAMLValidationError, yaml_interrogate, yaml_to_python
+
+
+def _write_csv(tmp_path, df):
+    p = tmp_path / "survey.csv"
+    df.write_csv(p)
+    return str(p)
+
+
+@pytest.fixture
+def survey_csv(tmp_path):
+    df = pl.DataFrame({"age": [34, -98, 41, -99, 29, -98, 55, 38]})
+    return _write_csv(tmp_path, df)
+
+
+def test_named_missing_spec_pct(survey_csv):
+    yaml_str = f"""
+tbl: {survey_csv}
+missing_specs:
+  standard_survey:
+    reasons:
+      -99: not_asked
+      -98: refused
+      -97: dont_know
+    categories:
+      nonresponse: [refused, dont_know]
+steps:
+  - col_pct_missing:
+      columns: age
+      missing: standard_survey
+      max_pct: 0.5
+  - col_pct_missing:
+      columns: age
+      missing: standard_survey
+      reason: refused
+      max_pct: 0.30
+"""
+    result = yaml_interrogate(yaml_str)
+    assert len(result.validation_info) == 2
+    # overall 3/8=0.375 <= 0.5 pass; refused 2/8=0.25 <= 0.30 pass
+    assert result.validation_info[0].all_passed is True
+    assert result.validation_info[1].all_passed is True
+
+
+def test_named_missing_spec_coded(tmp_path):
+    df = pl.DataFrame({"age": [34, -98, 41, None, 29, -99, 55, 38]})
+    csv = _write_csv(tmp_path, df)
+    yaml_str = f"""
+tbl: {csv}
+missing_specs:
+  survey:
+    reasons:
+      -99: not_asked
+      -98: refused
+steps:
+  - col_missing_coded:
+      columns: age
+      missing: survey
+"""
+    result = yaml_interrogate(yaml_str)
+    info = result.validation_info[0]
+    assert info.n_failed == 1  # one raw null
+
+
+def test_inline_missing_spec(survey_csv):
+    yaml_str = f"""
+tbl: {survey_csv}
+steps:
+  - col_pct_missing:
+      columns: age
+      missing:
+        reasons:
+          -99: not_asked
+          -98: refused
+      max_pct: 0.5
+"""
+    result = yaml_interrogate(yaml_str)
+    assert result.validation_info[0].all_passed is True
+
+
+def test_unknown_spec_reference_raises(survey_csv):
+    yaml_str = f"""
+tbl: {survey_csv}
+steps:
+  - col_pct_missing:
+      columns: age
+      missing: nonexistent
+      max_pct: 0.5
+"""
+    with pytest.raises(YAMLValidationError, match="Unknown missing spec"):
+        yaml_interrogate(yaml_str)
+
+
+def test_missing_specs_must_be_dict(survey_csv):
+    yaml_str = f"""
+tbl: {survey_csv}
+missing_specs:
+  - not_a_mapping
+steps:
+  - rows_distinct
+"""
+    with pytest.raises(YAMLValidationError):
+        yaml_interrogate(yaml_str)
+
+
+def test_yaml_to_python_renders_missing_spec(survey_csv):
+    yaml_str = f"""
+tbl: {survey_csv}
+missing_specs:
+  survey:
+    reasons:
+      -99: not_asked
+      -98: refused
+steps:
+  - col_pct_missing:
+      columns: age
+      missing: survey
+      max_pct: 0.5
+"""
+    code = yaml_to_python(yaml_str)
+    assert "pb.MissingSpec(" in code
+    assert "col_pct_missing" in code
+    assert "reasons=" in code

From a138223873dbeb2ba9dcb00697a720937b4a8ac1 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 18:33:10 -0400
Subject: [PATCH 26/55] Update _constants_translations.py

---
 pointblank/_constants_translations.py | 168 ++++++++++++++++++++++++++
 1 file changed, 168 insertions(+)

diff --git a/pointblank/_constants_translations.py b/pointblank/_constants_translations.py
index 920d494e8..14f57c58a 100644
--- a/pointblank/_constants_translations.py
+++ b/pointblank/_constants_translations.py
@@ -1217,6 +1217,174 @@
         "th": "มีค่าที่หายไปที่ไม่ได้เข้ารหัส (ค่า null ดิบ) อยู่ใน {column_text}",
         "fa": "مقادیر مفقود کدگذاری‌نشده (مقادیر null خام) در {column_text} وجود داشت.",
     },
+    "col_missing_only_coded_expectation_text": {
+        "en": "Expect that {column_text} contains only documented missing codes and legitimate values.",
+        "fr": "On s'attend à ce que {column_text} ne contienne que des codes de valeurs manquantes documentés et des valeurs légitimes.",
+        "de": "Erwarten Sie, dass {column_text} nur dokumentierte fehlende Codes und legitime Werte enthält.",
+        "it": "Aspettatevi che {column_text} contenga solo codici mancanti documentati e valori legittimi.",
+        "es": "Se espera que {column_text} contenga solo códigos de valores faltantes documentados y valores legítimos.",
+        "pt": "Espera-se que {column_text} contenha apenas códigos de valores ausentes documentados e valores legítimos.",
+        "ro": "Se așteaptă ca {column_text} să conțină doar coduri de valori lipsă documentate și valori legitime.",
+        "tr": "{column_text} öğesinin yalnızca belgelenmiş eksik kodları ve geçerli değerleri içermesini bekleyin.",
+        "zh-Hans": "预期{column_text}仅包含已记录的缺失值代码和合法值。",
+        "zh-Hant": "{column_text}應僅包含已記錄的缺失值代碼和合法值。",
+        "ja": "{column_text}に文書化された欠損コードと正当な値のみが含まれていることを期待します。",
+        "ko": "{column_text}에 문서화된 결측 코드와 정당한 값만 포함되어 있어야 합니다.",
+        "vi": "Kỳ vọng {column_text} chỉ chứa các mã thiếu đã được ghi nhận và các giá trị hợp lệ.",
+        "ru": "Ожидается, что {column_text} содержит только задокументированные коды отсутствия и допустимые значения.",
+        "cs": "Očekává se, že {column_text} obsahuje pouze zdokumentované chybějící kódy a legitimní hodnoty.",
+        "pl": "Oczekuje się, że {column_text} zawiera tylko udokumentowane kody braków i prawidłowe wartości.",
+        "da": "Forvent, at {column_text} kun indeholder dokumenterede manglende koder og legitime værdier.",
+        "sv": "Förvänta dig att {column_text} endast innehåller dokumenterade saknade koder och legitima värden.",
+        "nb": "Forvent at {column_text} bare inneholder dokumenterte manglende koder og legitime verdier.",
+        "nl": "Verwacht dat {column_text} alleen gedocumenteerde ontbrekende codes en legitieme waarden bevat.",
+        "fi": "Odota, että {column_text} sisältää vain dokumentoituja puuttuvien arvojen koodeja ja kelvollisia arvoja.",
+        "is": "Væntir þess að {column_text} innihaldi aðeins skráða vantandi kóða og lögmæt gildi.",
+        "ar": "توقع أن يحتوي {column_text} على رموز القيم المفقودة الموثقة والقيم المشروعة فقط.",
+        "hi": "अपेक्षा है कि {column_text} में केवल प्रलेखित अनुपस्थित कोड और वैध मान हों।",
+        "el": "Αναμένεται η στήλη {column_text} να περιέχει μόνο τεκμηριωμένους κωδικούς ελλιπών τιμών και έγκυρες τιμές.",
+        "id": "Mengharapkan bahwa {column_text} hanya berisi kode nilai yang hilang yang terdokumentasi dan nilai yang sah.",
+        "uk": "Очікується, що {column_text} містить лише задокументовані коди відсутності та допустимі значення.",
+        "bg": "Очаква се {column_text} да съдържа само документирани кодове за липсващи стойности и легитимни стойности.",
+        "hr": "Očekuje se da {column_text} sadrži samo dokumentirane kodove za nedostajuće vrijednosti i legitimne vrijednosti.",
+        "et": "Eeldatakse, et {column_text} sisaldab ainult dokumenteeritud puuduvate väärtuste koode ja õiguspäraseid väärtusi.",
+        "hu": "Elvárás, hogy a {column_text} csak dokumentált hiányzó kódokat és érvényes értékeket tartalmazzon.",
+        "ga": "Táthar ag súil nach mbeadh i {column_text} ach cóid ar iarraidh dhoiciméadaithe agus luachanna dlisteanacha.",
+        "lv": "Tiek sagaidīts, ka {column_text} satur tikai dokumentētus trūkstošo vērtību kodus un likumīgas vērtības.",
+        "lt": "Tikimasi, kad {column_text} yra tik dokumentuoti trūkstamų reikšmių kodai ir teisėtos reikšmės.",
+        "mt": "Mistenni li {column_text} ikun fih biss kodiċijiet ta' valuri nieqsa dokumentati u valuri leġittimi.",
+        "sk": "Očakáva sa, že {column_text} obsahuje iba zdokumentované chýbajúce kódy a legitímne hodnoty.",
+        "sl": "Pričakuje se, da {column_text} vsebuje samo dokumentirane kode manjkajočih vrednosti in legitimne vrednosti.",
+        "he": "צפוי ש{column_text} יכיל רק קודי ערכים חסרים מתועדים וערכים לגיטימיים.",
+        "th": "คาดหวังว่า {column_text} จะมีเฉพาะรหัสค่าที่หายไปที่มีการบันทึกไว้และค่าที่ถูกต้องเท่านั้น",
+        "fa": "انتظار می‌رود که {column_text} فقط شامل کدهای مفقود مستندشده و مقادیر معتبر باشد.",
+    },
+    "col_missing_only_coded_failure_text": {
+        "en": "Undocumented codes were present in {column_text}.",
+        "fr": "Des codes non documentés étaient présents dans {column_text}.",
+        "de": "Undokumentierte Codes waren in {column_text} vorhanden.",
+        "it": "Erano presenti codici non documentati in {column_text}.",
+        "es": "Había códigos no documentados en {column_text}.",
+        "pt": "Havia códigos não documentados em {column_text}.",
+        "ro": "Coduri nedocumentate au fost prezente în {column_text}.",
+        "tr": "{column_text} içinde belgelenmemiş kodlar mevcuttu.",
+        "zh-Hans": "{column_text}中存在未记录的代码。",
+        "zh-Hant": "{column_text}中存在未記錄的代碼。",
+        "ja": "{column_text}に文書化されていないコードが存在しました。",
+        "ko": "{column_text}에 문서화되지 않은 코드가 있었습니다.",
+        "vi": "Có các mã chưa được ghi nhận trong {column_text}.",
+        "ru": "В {column_text} присутствовали незадокументированные коды.",
+        "cs": "Ve sloupci {column_text} byly přítomny nezdokumentované kódy.",
+        "pl": "W {column_text} obecne były nieudokumentowane kody.",
+        "da": "Udokumenterede koder var til stede i {column_text}.",
+        "sv": "Odokumenterade koder fanns i {column_text}.",
+        "nb": "Udokumenterte koder var til stede i {column_text}.",
+        "nl": "Er waren ongedocumenteerde codes aanwezig in {column_text}.",
+        "fi": "Sarakkeessa {column_text} oli dokumentoimattomia koodeja.",
+        "is": "Óskráðir kóðar voru til staðar í {column_text}.",
+        "ar": "كانت هناك رموز غير موثقة في {column_text}.",
+        "hi": "{column_text} में बिना प्रलेखित कोड मौजूद थे।",
+        "el": "Υπήρχαν μη τεκμηριωμένοι κωδικοί στη στήλη {column_text}.",
+        "id": "Terdapat kode yang tidak terdokumentasi dalam {column_text}.",
+        "uk": "У {column_text} були наявні незадокументовані коди.",
+        "bg": "В {column_text} присъстваха недокументирани кодове.",
+        "hr": "U {column_text} bili su prisutni nedokumentirani kodovi.",
+        "et": "Veerus {column_text} esines dokumenteerimata koode.",
+        "hu": "A {column_text} oszlopban dokumentálatlan kódok voltak jelen.",
+        "ga": "Bhí cóid neamhdhoiciméadaithe i láthair i {column_text}.",
+        "lv": "{column_text} bija nedokumentēti kodi.",
+        "lt": "{column_text} buvo nedokumentuotų kodų.",
+        "mt": "Kien hemm kodiċijiet mhux dokumentati f'{column_text}.",
+        "sk": "V {column_text} sa vyskytli nezdokumentované kódy.",
+        "sl": "V {column_text} so bili prisotni nedokumentirani kodi.",
+        "he": "היו קודים לא מתועדים ב{column_text}.",
+        "th": "พบรหัสที่ไม่มีการบันทึกไว้ใน {column_text}",
+        "fa": "کدهای مستندنشده در {column_text} وجود داشت.",
+    },
+    "col_missing_consistent_expectation_text": {
+        "en": "Expect consistent missingness for reason {reason} across columns {columns_text}.",
+        "fr": "On s'attend à une absence cohérente pour la raison {reason} dans les colonnes {columns_text}.",
+        "de": "Erwarten Sie eine konsistente Fehlendheit für den Grund {reason} über die Spalten {columns_text} hinweg.",
+        "it": "Aspettatevi una mancanza coerente per il motivo {reason} tra le colonne {columns_text}.",
+        "es": "Se espera una ausencia coherente por el motivo {reason} en las columnas {columns_text}.",
+        "pt": "Espera-se uma ausência consistente pelo motivo {reason} nas colunas {columns_text}.",
+        "ro": "Se așteaptă o lipsă consecventă pentru motivul {reason} în coloanele {columns_text}.",
+        "tr": "{columns_text} sütunlarında {reason} nedeniyle tutarlı eksiklik bekleyin.",
+        "zh-Hans": "预期各列 {columns_text} 中因 {reason} 导致的缺失情况一致。",
+        "zh-Hant": "預期各欄 {columns_text} 中因 {reason} 導致的缺失情況一致。",
+        "ja": "列 {columns_text} 全体で理由 {reason} による欠損が一貫していることを期待します。",
+        "ko": "{columns_text} 열 전체에서 사유 {reason}에 대한 일관된 결측을 기대합니다.",
+        "vi": "Kỳ vọng sự thiếu hụt nhất quán cho lý do {reason} trên các cột {columns_text}.",
+        "ru": "Ожидается согласованная пропущенность по причине {reason} в столбцах {columns_text}.",
+        "cs": "Očekává se konzistentní chybějící hodnoty z důvodu {reason} napříč sloupci {columns_text}.",
+        "pl": "Oczekuje się spójnego braku danych z powodu {reason} w kolumnach {columns_text}.",
+        "da": "Forvent konsistent manglende data af årsagen {reason} på tværs af kolonnerne {columns_text}.",
+        "sv": "Förvänta dig konsekvent saknad data av orsaken {reason} över kolumnerna {columns_text}.",
+        "nb": "Forvent konsistent manglende data av årsaken {reason} på tvers av kolonnene {columns_text}.",
+        "nl": "Verwacht consistente ontbrekendheid om reden {reason} in de kolommen {columns_text}.",
+        "fi": "Odota johdonmukaista puuttuvuutta syystä {reason} sarakkeissa {columns_text}.",
+        "is": "Væntir samkvæmrar vöntunar af ástæðunni {reason} yfir dálkana {columns_text}.",
+        "ar": "توقع غيابًا متسقًا للسبب {reason} عبر الأعمدة {columns_text}.",
+        "hi": "अपेक्षा है कि कारण {reason} के लिए स्तंभों {columns_text} में सुसंगत अनुपस्थिति हो।",
+        "el": "Αναμένεται συνεπής έλλειψη για τον λόγο {reason} στις στήλες {columns_text}.",
+        "id": "Mengharapkan ketiadaan yang konsisten karena alasan {reason} di seluruh kolom {columns_text}.",
+        "uk": "Очікується узгоджена відсутність даних з причини {reason} у стовпцях {columns_text}.",
+        "bg": "Очаква се последователна липса по причина {reason} в колоните {columns_text}.",
+        "hr": "Očekuje se dosljedno nedostajanje za razlog {reason} u stupcima {columns_text}.",
+        "et": "Eeldatakse järjepidevat puudumist põhjusel {reason} veergudes {columns_text}.",
+        "hu": "Elvárás, hogy a {reason} okból következetes hiány legyen a(z) {columns_text} oszlopokban.",
+        "ga": "Táthar ag súil le heaspa chomhsheasmhach ar an gcúis {reason} ar fud na gcolún {columns_text}.",
+        "lv": "Tiek sagaidīts konsekvents trūkums iemesla {reason} dēļ kolonnās {columns_text}.",
+        "lt": "Tikimasi nuoseklaus trūkumo dėl priežasties {reason} stulpeliuose {columns_text}.",
+        "mt": "Mistenni nuqqas konsistenti għar-raġuni {reason} fil-kolonni {columns_text}.",
+        "sk": "Očakávajú sa konzistentné chýbajúce hodnoty z dôvodu {reason} v stĺpcoch {columns_text}.",
+        "sl": "Pričakuje se dosledno manjkanje zaradi razloga {reason} v stolpcih {columns_text}.",
+        "he": "צפויה חוסר עקבי מסיבה {reason} בעמודות {columns_text}.",
+        "th": "คาดหวังว่าการขาดหายไปด้วยเหตุผล {reason} จะสอดคล้องกันในคอลัมน์ {columns_text}",
+        "fa": "انتظار می‌رود فقدان سازگار به دلیل {reason} در ستون‌های {columns_text} وجود داشته باشد.",
+    },
+    "col_missing_consistent_failure_text": {
+        "en": "Inconsistent missingness for reason {reason} was found across columns {columns_text}.",
+        "fr": "Une absence incohérente pour la raison {reason} a été trouvée dans les colonnes {columns_text}.",
+        "de": "Inkonsistente Fehlendheit für den Grund {reason} wurde über die Spalten {columns_text} hinweg gefunden.",
+        "it": "È stata rilevata una mancanza incoerente per il motivo {reason} tra le colonne {columns_text}.",
+        "es": "Se encontró una ausencia incoherente por el motivo {reason} en las columnas {columns_text}.",
+        "pt": "Foi encontrada uma ausência inconsistente pelo motivo {reason} nas colunas {columns_text}.",
+        "ro": "A fost găsită o lipsă inconsecventă pentru motivul {reason} în coloanele {columns_text}.",
+        "tr": "{columns_text} sütunlarında {reason} nedeniyle tutarsız eksiklik bulundu.",
+        "zh-Hans": "在各列 {columns_text} 中发现因 {reason} 导致的缺失情况不一致。",
+        "zh-Hant": "在各欄 {columns_text} 中發現因 {reason} 導致的缺失情況不一致。",
+        "ja": "列 {columns_text} 全体で理由 {reason} による欠損が一貫していないことが見つかりました。",
+        "ko": "{columns_text} 열 전체에서 사유 {reason}에 대한 일관되지 않은 결측이 발견되었습니다.",
+        "vi": "Đã tìm thấy sự thiếu hụt không nhất quán cho lý do {reason} trên các cột {columns_text}.",
+        "ru": "В столбцах {columns_text} обнаружена несогласованная пропущенность по причине {reason}.",
+        "cs": "Napříč sloupci {columns_text} byly nalezeny nekonzistentní chybějící hodnoty z důvodu {reason}.",
+        "pl": "W kolumnach {columns_text} znaleziono niespójny brak danych z powodu {reason}.",
+        "da": "Inkonsistent manglende data af årsagen {reason} blev fundet på tværs af kolonnerne {columns_text}.",
+        "sv": "Inkonsekvent saknad data av orsaken {reason} hittades över kolumnerna {columns_text}.",
+        "nb": "Inkonsistent manglende data av årsaken {reason} ble funnet på tvers av kolonnene {columns_text}.",
+        "nl": "Inconsistente ontbrekendheid om reden {reason} werd aangetroffen in de kolommen {columns_text}.",
+        "fi": "Sarakkeissa {columns_text} havaittiin epäjohdonmukaista puuttuvuutta syystä {reason}.",
+        "is": "Ósamkvæm vöntun af ástæðunni {reason} fannst yfir dálkana {columns_text}.",
+        "ar": "تم العثور على غياب غير متسق للسبب {reason} عبر الأعمدة {columns_text}.",
+        "hi": "कारण {reason} के लिए स्तंभों {columns_text} में असंगत अनुपस्थिति पाई गई।",
+        "el": "Βρέθηκε ασυνεπής έλλειψη για τον λόγο {reason} στις στήλες {columns_text}.",
+        "id": "Ketiadaan yang tidak konsisten karena alasan {reason} ditemukan di seluruh kolom {columns_text}.",
+        "uk": "У стовпцях {columns_text} виявлено неузгоджену відсутність даних з причини {reason}.",
+        "bg": "Установена е непоследователна липса по причина {reason} в колоните {columns_text}.",
+        "hr": "Pronađeno je nedosljedno nedostajanje za razlog {reason} u stupcima {columns_text}.",
+        "et": "Veergudes {columns_text} leiti ebajärjepidev puudumine põhjusel {reason}.",
+        "hu": "A(z) {columns_text} oszlopokban következetlen hiány található a {reason} okból.",
+        "ga": "Fuarthas easpa neamhchomhsheasmhach ar an gcúis {reason} ar fud na gcolún {columns_text}.",
+        "lv": "Kolonnās {columns_text} tika atrasts nekonsekvents trūkums iemesla {reason} dēļ.",
+        "lt": "Stulpeliuose {columns_text} rastas nenuoseklus trūkumas dėl priežasties {reason}.",
+        "mt": "Instab nuqqas inkonsistenti għar-raġuni {reason} fil-kolonni {columns_text}.",
+        "sk": "V stĺpcoch {columns_text} sa našli nekonzistentné chýbajúce hodnoty z dôvodu {reason}.",
+        "sl": "V stolpcih {columns_text} je bilo najdeno nedosledno manjkanje zaradi razloga {reason}.",
+        "he": "נמצאה חוסר לא עקבי מסיבה {reason} בעמודות {columns_text}.",
+        "th": "พบการขาดหายไปด้วยเหตุผล {reason} ที่ไม่สอดคล้องกันในคอลัมน์ {columns_text}",
+        "fa": "فقدان ناسازگار به دلیل {reason} در ستون‌های {columns_text} یافت شد.",
+    },
     "regex_expectation_text": {
         "en": "Expect that values in {column_text} should match the regular expression: {values_text}.",
         "fr": "On s'attend à ce que les valeurs de {column_text} correspondent à l'expression régulière : {values_text}.",

From afa7cc5d2d8f754c0b2017f849129f87366a4cc2 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 18:33:16 -0400
Subject: [PATCH 27/55] Update _constants.py

---
 pointblank/_constants.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/pointblank/_constants.py b/pointblank/_constants.py
index d898a7495..9b67872a2 100644
--- a/pointblank/_constants.py
+++ b/pointblank/_constants.py
@@ -22,12 +22,15 @@
     "null": ["str", "numeric", "bool", "datetime", "duration"],
     "not_null": ["str", "numeric", "bool", "datetime", "duration"],
     "missing_coded": ["str", "numeric", "bool", "datetime", "duration"],
+    "missing_only_coded": ["str", "numeric", "bool", "datetime", "duration"],
 }
 
 ASSERTION_TYPE_METHOD_MAP: dict[str, str] = {
     "col_pct_null": "pct_null",
     "col_pct_missing": "pct_missing",
     "col_missing_coded": "missing_coded",
+    "col_missing_only_coded": "missing_only_coded",
+    "col_missing_consistent": "missing_consistent",
     "col_vals_gt": "gt",
     "col_vals_lt": "lt",
     "col_vals_eq": "eq",
@@ -95,6 +98,7 @@
     "col_vals_null",
     "col_vals_not_null",
     "col_missing_coded",
+    "col_missing_only_coded",
     "col_vals_expr",
     "conjointly",
     "prompt",
@@ -678,6 +682,26 @@
             <path d="M40.6120805,47.037834 C37.4692348,47.037834 35.0126139,45.9348613 33.712234,44.0140597 C32.4118541,45.9348613 29.9552331,47.037834 26.8123883,47.037834 C22.6574397,47.037834 16.0646712,43.4437723 16.0646712,33.8021619 C16.0646712,29.3401361 17.4715879,18.962166 30.5035862,18.962166 C30.9454018,18.962166 31.3057481,19.3225124 31.3057481,19.7643279 L31.3057481,21.3686518 C31.3057481,21.8104674 30.9454018,22.1708138 30.5035862,22.1708138 C26.6400486,22.1708138 22.4819668,25.8118774 22.4819668,33.8021619 C22.4819668,37.5090277 23.7635456,43.0270243 27.2949384,43.0270243 C29.795428,43.0270243 31.224279,40.4231312 32.0985095,38.2861221 C30.5067194,35.6101596 29.7014243,33.1034035 29.7014243,30.8347892 C29.7014243,25.6238707 31.8603677,23.7751377 33.712234,23.7751377 C35.5641002,23.7751377 37.7230437,25.6238707 37.7230437,30.8347892 C37.7230437,33.1347383 36.9396828,35.5788255 35.3290916,38.2861221 C36.6294715,41.4321009 38.243196,43.0270243 40.1295295,43.0270243 C43.6609223,43.0270243 44.9425012,37.5090277 44.9425012,33.8021619 C44.9425012,25.8118774 40.7844193,22.1708138 36.9208817,22.1708138 C36.4759329,22.1708138 36.1187198,21.8104674 36.1187198,21.3686518 L36.1187198,19.7643279 C36.1187198,19.3225124 36.4759329,18.962166 36.9208817,18.962166 C49.9528801,18.962166 51.3597967,29.3401361 51.3597967,33.8021619 C51.3597967,43.4437723 44.7670282,47.037834 40.6120805,47.037834 Z" id="omega" fill="#000000" fill-rule="nonzero"></path>
         </g>
     </g>
+</svg>""",
+    "col_missing_only_coded": """<?xml version="1.0" encoding="UTF-8"?>
+<svg width="67px" height="67px" viewBox="0 0 67 67" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+    <title>col_missing_only_coded</title>
+    <g id="Icons" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
+        <g id="col_missing_only_coded" transform="translate(0.000000, 0.551724)">
+            <path d="M56.712234,1 C59.1975153,1 61.4475153,2.00735931 63.076195,3.63603897 C64.7048747,5.26471863 65.712234,7.51471863 65.712234,10 L65.712234,10 L65.712234,65 L10.712234,65 C8.22695259,65 5.97695259,63.9926407 4.34827294,62.363961 C2.71959328,60.7352814 1.71223397,58.4852814 1.71223397,56 L1.71223397,56 L1.71223397,10 C1.71223397,7.51471863 2.71959328,5.26471863 4.34827294,3.63603897 C5.97695259,2.00735931 8.22695259,1 10.712234,1 L10.712234,1 Z" id="rectangle" stroke="#000000" stroke-width="2" fill="#FFFFFF"></path>
+            <path d="M40.6120805,47.037834 C37.4692348,47.037834 35.0126139,45.9348613 33.712234,44.0140597 C32.4118541,45.9348613 29.9552331,47.037834 26.8123883,47.037834 C22.6574397,47.037834 16.0646712,43.4437723 16.0646712,33.8021619 C16.0646712,29.3401361 17.4715879,18.962166 30.5035862,18.962166 C30.9454018,18.962166 31.3057481,19.3225124 31.3057481,19.7643279 L31.3057481,21.3686518 C31.3057481,21.8104674 30.9454018,22.1708138 30.5035862,22.1708138 C26.6400486,22.1708138 22.4819668,25.8118774 22.4819668,33.8021619 C22.4819668,37.5090277 23.7635456,43.0270243 27.2949384,43.0270243 C29.795428,43.0270243 31.224279,40.4231312 32.0985095,38.2861221 C30.5067194,35.6101596 29.7014243,33.1034035 29.7014243,30.8347892 C29.7014243,25.6238707 31.8603677,23.7751377 33.712234,23.7751377 C35.5641002,23.7751377 37.7230437,25.6238707 37.7230437,30.8347892 C37.7230437,33.1347383 36.9396828,35.5788255 35.3290916,38.2861221 C36.6294715,41.4321009 38.243196,43.0270243 40.1295295,43.0270243 C43.6609223,43.0270243 44.9425012,37.5090277 44.9425012,33.8021619 C44.9425012,25.8118774 40.7844193,22.1708138 36.9208817,22.1708138 C36.4759329,22.1708138 36.1187198,21.8104674 36.1187198,21.3686518 L36.1187198,19.7643279 C36.1187198,19.3225124 36.4759329,18.962166 36.9208817,18.962166 C49.9528801,18.962166 51.3597967,29.3401361 51.3597967,33.8021619 C51.3597967,43.4437723 44.7670282,47.037834 40.6120805,47.037834 Z" id="omega" fill="#000000" fill-rule="nonzero"></path>
+        </g>
+    </g>
+</svg>""",
+    "col_missing_consistent": """<?xml version="1.0" encoding="UTF-8"?>
+<svg width="67px" height="67px" viewBox="0 0 67 67" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+    <title>col_missing_consistent</title>
+    <g id="Icons" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
+        <g id="col_missing_consistent" transform="translate(0.000000, 0.551724)">
+            <path d="M56.712234,1 C59.1975153,1 61.4475153,2.00735931 63.076195,3.63603897 C64.7048747,5.26471863 65.712234,7.51471863 65.712234,10 L65.712234,10 L65.712234,65 L10.712234,65 C8.22695259,65 5.97695259,63.9926407 4.34827294,62.363961 C2.71959328,60.7352814 1.71223397,58.4852814 1.71223397,56 L1.71223397,56 L1.71223397,10 C1.71223397,7.51471863 2.71959328,5.26471863 4.34827294,3.63603897 C5.97695259,2.00735931 8.22695259,1 10.712234,1 L10.712234,1 Z" id="rectangle" stroke="#000000" stroke-width="2" fill="#FFFFFF"></path>
+            <path d="M40.6120805,47.037834 C37.4692348,47.037834 35.0126139,45.9348613 33.712234,44.0140597 C32.4118541,45.9348613 29.9552331,47.037834 26.8123883,47.037834 C22.6574397,47.037834 16.0646712,43.4437723 16.0646712,33.8021619 C16.0646712,29.3401361 17.4715879,18.962166 30.5035862,18.962166 C30.9454018,18.962166 31.3057481,19.3225124 31.3057481,19.7643279 L31.3057481,21.3686518 C31.3057481,21.8104674 30.9454018,22.1708138 30.5035862,22.1708138 C26.6400486,22.1708138 22.4819668,25.8118774 22.4819668,33.8021619 C22.4819668,37.5090277 23.7635456,43.0270243 27.2949384,43.0270243 C29.795428,43.0270243 31.224279,40.4231312 32.0985095,38.2861221 C30.5067194,35.6101596 29.7014243,33.1034035 29.7014243,30.8347892 C29.7014243,25.6238707 31.8603677,23.7751377 33.712234,23.7751377 C35.5641002,23.7751377 37.7230437,25.6238707 37.7230437,30.8347892 C37.7230437,33.1347383 36.9396828,35.5788255 35.3290916,38.2861221 C36.6294715,41.4321009 38.243196,43.0270243 40.1295295,43.0270243 C43.6609223,43.0270243 44.9425012,37.5090277 44.9425012,33.8021619 C44.9425012,25.8118774 40.7844193,22.1708138 36.9208817,22.1708138 C36.4759329,22.1708138 36.1187198,21.8104674 36.1187198,21.3686518 L36.1187198,19.7643279 C36.1187198,19.3225124 36.4759329,18.962166 36.9208817,18.962166 C49.9528801,18.962166 51.3597967,29.3401361 51.3597967,33.8021619 C51.3597967,43.4437723 44.7670282,47.037834 40.6120805,47.037834 Z" id="omega" fill="#000000" fill-rule="nonzero"></path>
+        </g>
+    </g>
 </svg>""",
     "col_vals_regex": """<?xml version="1.0" encoding="UTF-8"?>
 <svg width="67px" height="67px" viewBox="0 0 67 67" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">

From 61718391967fef0782080df4a0de6e5badb785d2 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 18:33:22 -0400
Subject: [PATCH 28/55] Update _interrogation.py

---
 pointblank/_interrogation.py | 76 ++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/pointblank/_interrogation.py b/pointblank/_interrogation.py
index 2e1e8e8af..218a705e1 100644
--- a/pointblank/_interrogation.py
+++ b/pointblank/_interrogation.py
@@ -2610,6 +2610,82 @@ def apply_missing_exclusion(results_tbl: IntoFrame, column: str, spec: Any) -> A
     return nw_tbl.to_native()
 
 
+def interrogate_missing_only_coded(
+    tbl: IntoFrame,
+    column: str,
+    sentinels: list,
+    count_null: bool,
+    allowed: list | None,
+    min_val: Any,
+    max_val: Any,
+) -> Any:
+    """Missing-only-coded interrogation.
+
+    A row passes when its value is either a declared sentinel (a documented missing code), a null
+    (when `count_null=True`), or a legitimate "real" value — one in `allowed` or within the
+    `[min_val, max_val]` range. Any other value is treated as an *undocumented* code and fails.
+    """
+    nw_tbl = nw.from_native(tbl)
+
+    good = None
+
+    def _or(expr):
+        nonlocal good
+        good = expr if good is None else (good | expr)
+
+    if sentinels:
+        _or(nw.col(column).is_in(sentinels).fill_null(False))
+    if count_null:
+        _or(nw.col(column).is_null())
+    if allowed:
+        _or(nw.col(column).is_in(allowed).fill_null(False))
+    if min_val is not None or max_val is not None:
+        range_expr = nw.lit(True)
+        if min_val is not None:
+            range_expr = range_expr & (nw.col(column) >= min_val)
+        if max_val is not None:
+            range_expr = range_expr & (nw.col(column) <= max_val)
+        _or(range_expr.fill_null(False))
+
+    if good is None:
+        good = nw.lit(False)
+
+    result_tbl = nw_tbl.with_columns(pb_is_good_=good)
+    return result_tbl.to_native()
+
+
+def interrogate_missing_consistent(
+    tbl: IntoFrame, columns: list[str], sentinels: list, count_null: bool
+) -> Any:
+    """Cross-column missing-consistency interrogation.
+
+    Given a set of related `columns`, a row passes when the "missing for a given reason" status is
+    consistent across all of them: either *none* of the columns carry the reason, or *all* of them
+    do. A row fails when some-but-not-all of the columns are missing for that reason. Missingness
+    for the reason is encoded by the `sentinels` values (and, when `count_null=True`, actual nulls).
+    """
+    nw_tbl = nw.from_native(tbl)
+    n_cols = len(columns)
+
+    count_expr = None
+    for c in columns:
+        if sentinels:
+            col_expr = nw.col(c).is_in(sentinels).fill_null(False)
+        else:
+            col_expr = nw.lit(False)  # noqa
+        if count_null:
+            col_expr = col_expr | nw.col(c).is_null()
+        col_count = col_expr.cast(nw.Int32)
+        count_expr = col_count if count_expr is None else (count_expr + col_count)
+
+    result_tbl = nw_tbl.with_columns(_n_reason_=count_expr)
+    result_tbl = result_tbl.with_columns(
+        pb_is_good_=((nw.col("_n_reason_") == 0) | (nw.col("_n_reason_") == n_cols))
+    )
+    result_tbl = result_tbl.drop("_n_reason_")
+    return result_tbl.to_native()
+
+
 def interrogate_missing_coded(tbl: IntoFrame, column: str) -> Any:
     """Missing-coded interrogation.
 

From 8c36fc661ea3d2304191ee7ec917028ef41e1873 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 18:33:27 -0400
Subject: [PATCH 29/55] Update validate.py

---
 pointblank/validate.py | 585 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 550 insertions(+), 35 deletions(-)

diff --git a/pointblank/validate.py b/pointblank/validate.py
index 78bdeeced..d3a0b0120 100644
--- a/pointblank/validate.py
+++ b/pointblank/validate.py
@@ -78,6 +78,8 @@
     interrogate_lt,
     interrogate_ne,
     interrogate_missing_coded,
+    interrogate_missing_consistent,
+    interrogate_missing_only_coded,
     interrogate_not_null,
     interrogate_notin,
     interrogate_null,
@@ -2684,9 +2686,15 @@ def _prettify_reason_label(reason: str) -> str:
     return reason.replace("_", " ").title()
 
 
-def _build_structured_missing_tbl(data: Any, missing: dict[str, MissingSpec]) -> GT:
+def _build_structured_missing_tbl(
+    data: Any, missing: dict[str, MissingSpec], as_heatmap: bool = False
+) -> GT:
     """Build a structured-missingness breakdown table (one row per column, columns for the count
-    and percentage of complete values and of each missing reason)."""
+    and percentage of complete values and of each missing reason).
+
+    When `as_heatmap=True`, render the reason proportions as a color-coded heatmap (cells shaded
+    from light to dark by the proportion missing for each reason) instead of count/percent text.
+    """
     if not isinstance(missing, dict):
         raise TypeError(
             f"`missing=` must be a dict mapping column names to MissingSpec objects, "
@@ -2745,18 +2753,32 @@ def _build_structured_missing_tbl(data: Any, missing: dict[str, MissingSpec]) ->
         total_missing = sum(counts.values())
         complete = total - total_missing
 
-        def _fmt(count: int) -> str:
-            pct = round(100 * count / total) if total > 0 else 0
-            return f"{count} ({pct}%)"
+        def _prop(count: int) -> float:
+            return (count / total) if total > 0 else 0.0
 
-        record: dict[str, Any] = {
-            "columns": column,
-            "total_n": str(total),
-            "complete": _fmt(complete),
-        }
-        # Fill every reason column in the union (0 for reasons this spec doesn't define)
-        for r in reason_order:
-            record[r] = _fmt(counts.get(r, 0))
+        if as_heatmap:
+            # Numeric proportions (0..1) so cells can be color-shaded by missingness
+            record: dict[str, Any] = {
+                "columns": column,
+                "total_n": str(total),
+                "complete": _prop(complete),
+            }
+            for r in reason_order:
+                record[r] = _prop(counts.get(r, 0))
+        else:
+
+            def _fmt(count: int) -> str:
+                pct = round(100 * count / total) if total > 0 else 0
+                return f"{count} ({pct}%)"
+
+            record = {
+                "columns": column,
+                "total_n": str(total),
+                "complete": _fmt(complete),
+            }
+            # Fill every reason column in the union (0 for reasons this spec doesn't define)
+            for r in reason_order:
+                record[r] = _fmt(counts.get(r, 0))
         records.append(record)
 
     # Build a DataFrame from the records using the available DataFrame library
@@ -2770,9 +2792,6 @@ def _fmt(count: int) -> str:
 
         breakdown_df = pd.DataFrame(records)
 
-    title = "Missing Values by Reason"
-    subtitle = "Counts and percentages of complete values and each missing reason, per column."
-
     cols_labels = {
         "columns": "Column",
         "total_n": "Total N",
@@ -2783,23 +2802,55 @@ def _fmt(count: int) -> str:
 
     value_columns = ["total_n", "complete"] + reason_order
 
-    gt_tbl = (
-        GT(breakdown_df)
-        .tab_header(title=html(f"<div style='font-size: 14px;'>{title}</div>"), subtitle=subtitle)
-        .opt_table_font(font=google_font(name="IBM Plex Sans"))
-        .opt_align_table_header(align="left")
-        .cols_label(cases=cols_labels)
-        .cols_align(align="right", columns=value_columns)
-        .cols_align(align="left", columns="columns")
-        .tab_style(
-            style=style.text(font=google_font(name="IBM Plex Mono"), size="12px"),
-            locations=loc.body(columns=value_columns),
+    if as_heatmap:
+        title = "Missing Pattern Heatmap"
+        subtitle = "Proportion of each missing reason per column (darker = more missing)."
+        prop_columns = ["complete"] + reason_order
+
+        gt_tbl = (
+            GT(breakdown_df)
+            .tab_header(
+                title=html(f"<div style='font-size: 14px;'>{title}</div>"), subtitle=subtitle
+            )
+            .opt_table_font(font=google_font(name="IBM Plex Sans"))
+            .opt_align_table_header(align="left")
+            .cols_label(cases=cols_labels)
+            .cols_align(align="center", columns=value_columns)
+            .cols_align(align="left", columns="columns")
+            .fmt_percent(columns=prop_columns, decimals=0)
+            .data_color(
+                columns=reason_order,
+                palette=["#F5F5F5", "#000000"],
+                domain=[0, 1],
+            )
+            .tab_style(
+                style=style.text(weight="bold"),
+                locations=loc.body(columns="columns"),
+            )
         )
-        .tab_style(
-            style=style.text(weight="bold"),
-            locations=loc.body(columns="columns"),
+    else:
+        title = "Missing Values by Reason"
+        subtitle = "Counts and percentages of complete values and each missing reason, per column."
+
+        gt_tbl = (
+            GT(breakdown_df)
+            .tab_header(
+                title=html(f"<div style='font-size: 14px;'>{title}</div>"), subtitle=subtitle
+            )
+            .opt_table_font(font=google_font(name="IBM Plex Sans"))
+            .opt_align_table_header(align="left")
+            .cols_label(cases=cols_labels)
+            .cols_align(align="right", columns=value_columns)
+            .cols_align(align="left", columns="columns")
+            .tab_style(
+                style=style.text(font=google_font(name="IBM Plex Mono"), size="12px"),
+                locations=loc.body(columns=value_columns),
+            )
+            .tab_style(
+                style=style.text(weight="bold"),
+                locations=loc.body(columns="columns"),
+            )
         )
-    )
 
     if version("great_tables") >= "0.17.0":
         gt_tbl = gt_tbl.tab_options(quarto_disable_processing=True)
@@ -2807,7 +2858,9 @@ def _fmt(count: int) -> str:
     return gt_tbl
 
 
-def missing_vals_tbl(data: Any, missing: dict[str, MissingSpec] | None = None) -> GT:
+def missing_vals_tbl(
+    data: Any, missing: dict[str, MissingSpec] | None = None, as_heatmap: bool = False
+) -> GT:
     """
     Display a table that shows the missing values in the input table.
 
@@ -2832,6 +2885,10 @@ def missing_vals_tbl(data: Any, missing: dict[str, MissingSpec] | None = None) -
         An optional dictionary mapping column names to [`MissingSpec`](`pointblank.MissingSpec`)
         objects. When provided, the function renders a structured breakdown of missingness by
         reason for the specified columns (rather than the default sector heatmap).
+    as_heatmap
+        Only applies when `missing=` is provided. When `True`, render the per-reason proportions as
+        a color-coded heatmap (cells shaded from light to dark by the proportion missing) instead of
+        the count/percentage text breakdown. Default is `False`.
 
     Returns
     -------
@@ -2913,7 +2970,7 @@ def missing_vals_tbl(data: Any, missing: dict[str, MissingSpec] | None = None) -
     # (count and percentage of complete values and each missing reason, per column) instead of
     # the default sector heatmap
     if missing is not None:
-        return _build_structured_missing_tbl(data=data, missing=missing)
+        return _build_structured_missing_tbl(data=data, missing=missing, as_heatmap=as_heatmap)
 
     # Get the number of rows in the table
     n_rows = get_row_count(data)
@@ -10894,6 +10951,168 @@ def col_missing_coded(
 
         return self
 
+    def col_missing_only_coded(
+        self,
+        columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
+        missing: MissingSpec,
+        allowed: Collection[Any] | None = None,
+        min_val: float | int | None = None,
+        max_val: float | int | None = None,
+        pre: Callable | None = None,
+        segments: SegmentSpec | None = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
+        actions: Actions | None = None,
+        brief: str | bool | None = None,
+        active: bool | Callable = True,
+    ) -> Validate:
+        """
+        Validate that a column contains only documented codes and legitimate values.
+
+        The `col_missing_only_coded()` method checks that every value in a column is *accounted
+        for*: it is either a declared missing-value code (a sentinel in the
+        [`MissingSpec`](`pointblank.MissingSpec`), or a null when `null_is_missing=True`), or a
+        legitimate "real" value. Legitimate real values are defined by `allowed=` (an explicit set)
+        and/or a `[min_val, max_val]` range. Any value that is neither a documented code nor a
+        legitimate real value is flagged — this catches *undocumented* sentinel codes (e.g., a
+        stray `-95`) that aren't part of the spec.
+
+        At least one of `allowed=`, `min_val=`, or `max_val=` must be provided so that legitimate
+        real values can be distinguished from undocumented codes. This validation operates over the
+        number of test units equal to the number of rows in the table.
+
+        Parameters
+        ----------
+        columns
+            A single column or a list of columns to validate. Can also use
+            [`col()`](`pointblank.col`) with column selectors to specify one or more columns.
+        missing
+            A [`MissingSpec`](`pointblank.MissingSpec`) declaring the documented sentinel codes.
+        allowed
+            An explicit set of legitimate real values. A value in this set passes. Can be combined
+            with `min_val=`/`max_val=` (a value passes if it satisfies either constraint).
+        min_val
+            Lower bound (inclusive) of the legitimate real-value range.
+        max_val
+            Upper bound (inclusive) of the legitimate real-value range.
+        pre
+            An optional preprocessing function or lambda to apply to the data table during
+            interrogation. This function should take a table as input and return a modified table.
+        segments
+            An optional directive on segmentation, which serves to split a validation step into
+            multiple (one step per segment).
+        thresholds
+            Set threshold failure levels for reporting and reacting to exceedences of the levels.
+            The thresholds are set at the step level and will override any global thresholds set in
+            `Validate(thresholds=...)`.
+        actions
+            Optional actions to take when the validation step meets or exceeds any set threshold
+            levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
+            define the actions.
+        brief
+            An optional brief description of the validation step that will be displayed in the
+            reporting table. You can use the templating elements like `"{step}"` to insert
+            the step number, or `"{auto}"` to include an automatically generated brief. If `True`
+            the entire brief will be automatically generated. If `None` (the default) then there
+            won't be a brief.
+        active
+            A boolean value or callable that determines whether the validation step should be
+            active. Using `False` will make the validation step inactive (still reporting its
+            presence and keeping indexes for the steps unchanged).
+
+        Returns
+        -------
+        Validate
+            The `Validate` object with the added validation step.
+
+        Examples
+        --------
+        ```{python}
+        #| echo: false
+        #| output: false
+        import pointblank as pb
+        pb.config(report_incl_header=False, report_incl_footer_timings=False, preview_incl_header=False)
+        ```
+        The `age` column should contain real ages in `[0, 120]` or the documented codes `-99`/`-98`.
+        The value `-95` is an *undocumented* code and should be flagged:
+
+        ```{python}
+        import pointblank as pb
+        import polars as pl
+
+        tbl = pl.DataFrame({"age": [34, -98, 41, -95, 29, -99, 55]})
+
+        age_missing = pb.MissingSpec(reasons={-99: "not_asked", -98: "refused"})
+
+        validation = (
+            pb.Validate(data=tbl)
+            .col_missing_only_coded(columns="age", missing=age_missing, min_val=0, max_val=120)
+            .interrogate()
+        )
+
+        validation
+        ```
+
+        The validation reports one failing test unit: the row where `age` is `-95`, which is
+        neither a real age in range nor a declared sentinel.
+        """
+        assertion_type = _get_fn_name()
+
+        _check_column(column=columns)
+        _check_pre(pre=pre)
+        _check_thresholds(thresholds=thresholds)
+        _check_active_input(param=active, param_name="active")
+
+        if not isinstance(missing, MissingSpec):
+            raise TypeError(
+                f"`missing=` must be a MissingSpec, got {type(missing).__name__}."
+            )
+
+        if allowed is None and min_val is None and max_val is None:
+            raise ValueError(
+                "`col_missing_only_coded()` requires at least one of `allowed=`, `min_val=`, or "
+                "`max_val=` so that legitimate real values can be distinguished from undocumented "
+                "codes."
+            )
+
+        sentinels = missing.sentinel_values()
+        count_null = missing.null_is_missing
+        allowed_list = list(allowed) if allowed is not None else None
+
+        # Determine threshold to use (global or local) and normalize a local `thresholds=` value
+        thresholds = (
+            self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
+        )
+
+        columns = _resolve_columns(columns)
+
+        # Determine brief to use (global or local) and transform any shorthands of `brief=`
+        brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
+
+        # Iterate over the columns and create a validation step for each
+        for column in columns:
+            val_info = _ValidationInfo(
+                assertion_type=assertion_type,
+                column=column,
+                values={
+                    "sentinels": sentinels,
+                    "count_null": count_null,
+                    "allowed": allowed_list,
+                    "min_val": min_val,
+                    "max_val": max_val,
+                    "spec": missing,
+                },
+                pre=pre,
+                segments=segments,
+                thresholds=thresholds,
+                actions=actions,
+                brief=brief,
+                active=active,
+            )
+
+            self._add_validation(validation_info=val_info)
+
+        return self
+
     def rows_distinct(
         self,
         columns_subset: str | list[str] | None = None,
@@ -11386,6 +11605,167 @@ def rows_complete(
 
         return self
 
+    def col_missing_consistent(
+        self,
+        columns: list[str],
+        missing: MissingSpec,
+        when_reason: str,
+        pre: Callable | None = None,
+        segments: SegmentSpec | None = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
+        actions: Actions | None = None,
+        brief: str | bool | None = None,
+        active: bool | Callable = True,
+    ) -> Validate:
+        """
+        Validate that related columns share a consistent missingness pattern for a given reason.
+
+        The `col_missing_consistent()` method checks that, across a set of related columns, the
+        "missing for a specific reason" status is *consistent*: for each row, either *none* of the
+        columns are missing for `when_reason=`, or *all* of them are. This is useful for structured
+        survey or clinical data where a skip pattern should propagate across related fields — for
+        example, if a question wasn't asked (`"not_asked"`) then all of its dependent fields should
+        also be coded `"not_asked"`.
+
+        A value is considered "missing for the reason" when it is one of the sentinel values mapped
+        to `when_reason=` in the [`MissingSpec`](`pointblank.MissingSpec`) (and, when the reason is
+        the spec's `null_reason` and `null_is_missing=True`, an actual null). This validation
+        operates over the number of test units equal to the number of rows in the table. A row fails
+        when some — but not all — of the columns are missing for the given reason.
+
+        Parameters
+        ----------
+        columns
+            A list of related columns to check for consistent missingness.
+        missing
+            A [`MissingSpec`](`pointblank.MissingSpec`) describing the sentinel values and their
+            reasons for the columns.
+        when_reason
+            The reason label whose presence should be consistent across `columns=`. If one column
+            in a row is missing for this reason, all of them should be.
+        pre
+            An optional preprocessing function or lambda to apply to the data table during
+            interrogation. This function should take a table as input and return a modified table.
+        segments
+            An optional directive on segmentation, which serves to split a validation step into
+            multiple (one step per segment).
+        thresholds
+            Set threshold failure levels for reporting and reacting to exceedences of the levels.
+            The thresholds are set at the step level and will override any global thresholds set in
+            `Validate(thresholds=...)`.
+        actions
+            Optional actions to take when the validation step meets or exceeds any set threshold
+            levels. If provided, the [`Actions`](`pointblank.Actions`) class should be used to
+            define the actions.
+        brief
+            An optional brief description of the validation step that will be displayed in the
+            reporting table. You can use the templating elements like `"{step}"` to insert
+            the step number, or `"{auto}"` to include an automatically generated brief. If `True`
+            the entire brief will be automatically generated. If `None` (the default) then there
+            won't be a brief.
+        active
+            A boolean value or callable that determines whether the validation step should be
+            active. Using `False` will make the validation step inactive (still reporting its
+            presence and keeping indexes for the steps unchanged).
+
+        Returns
+        -------
+        Validate
+            The `Validate` object with the added validation step.
+
+        Examples
+        --------
+        ```{python}
+        #| echo: false
+        #| output: false
+        import pointblank as pb
+        pb.config(report_incl_header=False, report_incl_footer_timings=False, preview_incl_header=False)
+        ```
+        Here, `income_source` and `income_amount` should both be coded `"not_asked"` (`-99`) together
+        when the income question wasn't asked. The last row is inconsistent — only one field is
+        coded `-99`:
+
+        ```{python}
+        import pointblank as pb
+        import polars as pl
+
+        tbl = pl.DataFrame(
+            {
+                "income_source": [1, -99, 2, -99],
+                "income_amount": [50000, -99, 42000, 38000],
+            }
+        )
+
+        income_missing = pb.MissingSpec(reasons={-99: "not_asked", -98: "refused"})
+
+        validation = (
+            pb.Validate(data=tbl)
+            .col_missing_consistent(
+                columns=["income_source", "income_amount"],
+                missing=income_missing,
+                when_reason="not_asked",
+            )
+            .interrogate()
+        )
+
+        validation
+        ```
+
+        The validation reports one failing test unit: the final row, where `income_source` is coded
+        `-99` (`"not_asked"`) but `income_amount` is a real value.
+        """
+        assertion_type = _get_fn_name()
+
+        _check_pre(pre=pre)
+        _check_thresholds(thresholds=thresholds)
+        _check_active_input(param=active, param_name="active")
+
+        if not isinstance(missing, MissingSpec):
+            raise TypeError(
+                f"`missing=` must be a MissingSpec, got {type(missing).__name__}."
+            )
+
+        if isinstance(columns, str):
+            columns = [columns]
+        columns = list(columns)
+        if len(columns) < 2:
+            raise ValueError(
+                "`col_missing_consistent()` requires at least two columns to compare."
+            )
+
+        # Resolve which sentinel values (and whether nulls) represent `when_reason`
+        sentinels = missing.values_for_reason(when_reason)
+        count_null = missing.null_is_missing and missing.null_reason == when_reason
+
+        # Determine threshold to use (global or local) and normalize a local `thresholds=` value
+        thresholds = (
+            self.thresholds if thresholds is None else _normalize_thresholds_creation(thresholds)
+        )
+
+        # Determine brief to use (global or local) and transform any shorthands of `brief=`
+        brief = self.brief if brief is None else _transform_auto_brief(brief=brief)
+
+        val_info = _ValidationInfo(
+            assertion_type=assertion_type,
+            column=columns,
+            values={
+                "sentinels": sentinels,
+                "count_null": count_null,
+                "when_reason": when_reason,
+                "spec": missing,
+            },
+            pre=pre,
+            segments=segments,
+            thresholds=thresholds,
+            actions=actions,
+            brief=brief,
+            active=active,
+        )
+
+        self._add_validation(validation_info=val_info)
+
+        return self
+
     def prompt(
         self,
         prompt: str,
@@ -14260,6 +14640,7 @@ def interrogate(
                         "col_vals_null",
                         "col_vals_not_null",
                         "col_missing_coded",
+                        "col_missing_only_coded",
                         "col_vals_increasing",
                         "col_vals_decreasing",
                         "col_vals_between",
@@ -14304,6 +14685,16 @@ def interrogate(
                             results_tbl = interrogate_not_null(tbl=tbl, column=column)
                         elif assertion_method == "missing_coded":
                             results_tbl = interrogate_missing_coded(tbl=tbl, column=column)
+                        elif assertion_method == "missing_only_coded":
+                            results_tbl = interrogate_missing_only_coded(
+                                tbl=tbl,
+                                column=column,
+                                sentinels=value["sentinels"],
+                                count_null=value["count_null"],
+                                allowed=value["allowed"],
+                                min_val=value["min_val"],
+                                max_val=value["max_val"],
+                            )
 
                         elif assertion_type == "col_vals_increasing":
                             from pointblank._interrogation import interrogate_increasing
@@ -14427,6 +14818,14 @@ def interrogate(
                     elif assertion_type == "rows_complete":
                         results_tbl = rows_complete(data_tbl=data_tbl_step, columns_subset=column)
 
+                    elif assertion_type == "col_missing_consistent":
+                        results_tbl = interrogate_missing_consistent(
+                            tbl=data_tbl_step,
+                            columns=column,
+                            sentinels=value["sentinels"],
+                            count_null=value["count_null"],
+                        )
+
                     elif assertion_type == "prompt":
                         from pointblank._interrogation import interrogate_prompt
 
@@ -15048,7 +15447,8 @@ def interrogate(
             if (
                 collect_extracts
                 and assertion_type
-                in ROW_BASED_VALIDATION_TYPES + ["rows_distinct", "rows_complete"]
+                in ROW_BASED_VALIDATION_TYPES
+                + ["rows_distinct", "rows_complete", "col_missing_consistent"]
                 and tbl_type not in IBIS_BACKENDS
             ):
                 # Add row numbers to the results table
@@ -17480,6 +17880,20 @@ def get_tabular_report(
             ]:
                 values_upd.append("&mdash;")
 
+            elif assertion_type[i] in ["col_missing_consistent"]:
+                # Show the reason being checked for cross-column consistency
+                values_upd.append(f"when_reason = {value.get('when_reason')}")
+
+            elif assertion_type[i] in ["col_missing_only_coded"]:
+                # Show the allowed real values and/or range used to define legitimate values
+                parts = []
+                if value.get("allowed") is not None:
+                    allowed_str = str(value["allowed"])[1:-1].replace("'", "")
+                    parts.append(f"allowed = {allowed_str}")
+                if value.get("min_val") is not None or value.get("max_val") is not None:
+                    parts.append(f"[{value.get('min_val')}, {value.get('max_val')}]")
+                values_upd.append("<br/>".join(parts) if parts else "&mdash;")
+
             elif assertion_type[i] in ["col_pct_null"]:
                 # Extract p and tol from the values dict for nice formatting
                 p_value = value["p"]
@@ -17633,6 +18047,20 @@ def get_tabular_report(
             else:  # pragma: no cover
                 values_upd.append(str(value))  # pragma: no cover
 
+        # Annotate `col_vals_*` steps that carry a `missing=` MissingSpec so the report shows that
+        # structured-missing values (sentinels and, optionally, nulls) were excluded from the check.
+        # The `missing` spec is fetched directly from the validation steps (it isn't a report field).
+        missing_specs = [getattr(v, "missing", None) for v in self.validation_info]
+        for i, spec in enumerate(missing_specs):
+            if spec is None or i >= len(values_upd):
+                continue
+            reasons = ", ".join(spec.reasons_list()) if hasattr(spec, "reasons_list") else ""
+            annotation = (
+                "<br/><span style='font-size: 9px; color: #999999;'>"
+                f"missing-aware: {reasons}</span>"
+            )
+            values_upd[i] = f"{values_upd[i]}{annotation}"
+
         # Remove the `inclusive` entry from the dictionary
         validation_info_dict.pop("inclusive")
 
@@ -18357,7 +18785,7 @@ def get_step_report(
         # if get_row_count(extract) == 0:
         #    return "No rows were extracted."
 
-        if assertion_type in ROW_BASED_VALIDATION_TYPES + ["rows_complete"]:
+        if assertion_type in ROW_BASED_VALIDATION_TYPES + ["rows_complete", "col_missing_consistent"]:
             # Get the extracted data for the step
             extract = self.get_data_extracts(i=i, frame=True)
 
@@ -18439,6 +18867,20 @@ def get_step_report(
         else:
             step_report = None  # pragma: no cover
 
+        # If the step is associated with a MissingSpec, append a legend of the missing-value codes
+        # and their reasons so that sentinel values appearing in the failing rows can be interpreted
+        step_spec = getattr(self.validation_info[i - 1], "missing", None)
+        if step_spec is None and isinstance(values, MissingSpec):
+            # col_missing_coded stores the spec directly in `values`
+            step_spec = values
+        if step_spec is None and isinstance(values, dict) and isinstance(values.get("spec"), MissingSpec):
+            # col_missing_only_coded and col_missing_consistent stash the spec under `values["spec"]`
+            step_spec = values["spec"]
+        if step_spec is not None and step_report is not None:
+            legend_html = _missing_legend_html(step_spec)
+            if legend_html and hasattr(step_report, "tab_source_note"):
+                step_report = step_report.tab_source_note(source_note=html(legend_html))
+
         return step_report
 
     def get_dataframe_report(
@@ -19828,6 +20270,21 @@ def _create_autobrief_or_failure_text(
             for_failure=for_failure,
         )
 
+    if assertion_type == "col_missing_only_coded":
+        return _create_text_col_missing_only_coded(
+            lang=lang,
+            column=column,
+            for_failure=for_failure,
+        )
+
+    if assertion_type == "col_missing_consistent":
+        return _create_text_col_missing_consistent(
+            lang=lang,
+            columns=column,
+            value=values,
+            for_failure=for_failure,
+        )
+
     if assertion_type == "conjointly":
         return _create_text_conjointly(lang=lang, for_failure=for_failure)
 
@@ -20271,6 +20728,38 @@ def _create_text_col_missing_coded(lang: str, column: str | None, for_failure: b
     )
 
 
+def _create_text_col_missing_only_coded(
+    lang: str, column: str | None, for_failure: bool = False
+) -> str:
+    """Create autobrief/failure text for col_missing_only_coded validation."""
+    type_ = _expect_failure_type(for_failure=for_failure)
+
+    column_text = _prep_column_text(column=column)
+
+    return EXPECT_FAIL_TEXT[f"col_missing_only_coded_{type_}_text"][lang].format(
+        column_text=column_text,
+    )
+
+
+def _create_text_col_missing_consistent(
+    lang: str, columns: Any, value: dict, for_failure: bool = False
+) -> str:
+    """Create autobrief/failure text for col_missing_consistent validation."""
+    type_ = _expect_failure_type(for_failure=for_failure)
+
+    if isinstance(columns, (list, tuple)):
+        columns_text = _prep_values_text(values=list(columns), lang=lang, limit=5)
+    else:
+        columns_text = _prep_column_text(column=columns)
+
+    reason = value.get("when_reason") if isinstance(value, dict) else None
+
+    return EXPECT_FAIL_TEXT[f"col_missing_consistent_{type_}_text"][lang].format(
+        columns_text=columns_text,
+        reason=reason,
+    )
+
+
 def _create_text_conjointly(lang: str, for_failure: bool = False) -> str:
     type_ = _expect_failure_type(for_failure=for_failure)
 
@@ -20618,6 +21107,21 @@ def _apply_segments(data_tbl: Any, segments_expr: tuple[str, str]) -> Any:
     return data_tbl
 
 
+def _missing_legend_html(spec: Any) -> str:
+    """Build an HTML legend of a MissingSpec's sentinel codes and their reasons, for step reports."""
+    if not hasattr(spec, "reasons"):
+        return ""
+    items = [f"<code>{value}</code> &rarr; {reason}" for value, reason in spec.reasons.items()]
+    if getattr(spec, "null_is_missing", False):
+        items.append(f"<code>null</code> &rarr; {spec.null_reason}")
+    if not items:
+        return ""
+    return (
+        "<div style='font-size: 10px; color: #555555; padding-top: 4px;'>"
+        "<strong>Missing codes:</strong> " + "; ".join(items) + "</div>"
+    )
+
+
 def _validation_info_as_dict(validation_info: _ValidationInfo) -> dict:
     """
     Convert a `_ValidationInfo` object to a dictionary.
@@ -22344,6 +22848,17 @@ def _step_report_row_based(
             text = STEP_REPORT_TEXT["rows_complete_all"][lang]
         else:
             text = STEP_REPORT_TEXT["rows_complete_subset"][lang]
+    elif assertion_type == "col_missing_coded":
+        text = f"{column} is missing-coded"
+    elif assertion_type == "col_missing_only_coded":
+        text = f"{column} only documented codes"
+    elif assertion_type == "col_missing_consistent":
+        cols = ", ".join(column) if isinstance(column, (list, tuple)) else str(column)
+        reason = values.get("when_reason") if isinstance(values, dict) else None
+        text = f"consistent &ldquo;{reason}&rdquo; across {{{cols}}}"
+    else:
+        # Fallback for any other assertion type: show the assertion type name
+        text = str(assertion_type)
 
     # Wrap assertion text in a <code> tag
     text = (

From 0b2c73a1452a90a9e1c1a0e1c724d5aa113de44f Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 18:33:32 -0400
Subject: [PATCH 30/55] Update validate.pyi

---
 pointblank/validate.pyi | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/pointblank/validate.pyi b/pointblank/validate.pyi
index 359686cf2..25e1714b9 100644
--- a/pointblank/validate.pyi
+++ b/pointblank/validate.pyi
@@ -78,7 +78,9 @@ def preview(
     min_tbl_width: int = 500,
     incl_header: bool | None = None,
 ) -> GT: ...
-def missing_vals_tbl(data: Any, missing: dict[str, MissingSpec] | None = None) -> GT: ...
+def missing_vals_tbl(
+    data: Any, missing: dict[str, MissingSpec] | None = None, as_heatmap: bool = False
+) -> GT: ...
 def get_column_count(data: Any) -> int: ...
 def get_row_count(data: Any) -> int: ...
 @dataclass
@@ -432,6 +434,20 @@ class Validate:
         brief: str | bool | None = None,
         active: bool | Callable = True,
     ) -> Validate: ...
+    def col_missing_only_coded(
+        self,
+        columns: str | list[str] | Column | ColumnSelector | ColumnSelectorNarwhals,
+        missing: MissingSpec,
+        allowed: Collection[Any] | None = None,
+        min_val: float | int | None = None,
+        max_val: float | int | None = None,
+        pre: Callable | None = None,
+        segments: SegmentSpec | None = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
+        actions: Actions | None = None,
+        brief: str | bool | None = None,
+        active: bool | Callable = True,
+    ) -> Validate: ...
     def rows_distinct(
         self,
         columns_subset: str | list[str] | None = None,
@@ -452,6 +468,18 @@ class Validate:
         brief: str | bool | None = None,
         active: bool | Callable = True,
     ) -> Validate: ...
+    def col_missing_consistent(
+        self,
+        columns: list[str],
+        missing: MissingSpec,
+        when_reason: str,
+        pre: Callable | None = None,
+        segments: SegmentSpec | None = None,
+        thresholds: int | float | bool | tuple | dict | Thresholds | None = None,
+        actions: Actions | None = None,
+        brief: str | bool | None = None,
+        active: bool | Callable = True,
+    ) -> Validate: ...
     def prompt(
         self,
         prompt: str,

From 2832838b2a84963cdbe07c6c9ec35c5b05f432d2 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 18:33:38 -0400
Subject: [PATCH 31/55] Update yaml.py

---
 pointblank/yaml.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pointblank/yaml.py b/pointblank/yaml.py
index 455d6744f..6a9e39c26 100644
--- a/pointblank/yaml.py
+++ b/pointblank/yaml.py
@@ -275,6 +275,8 @@ class YAMLValidator:
         "col_pct_null": "col_pct_null",
         "col_pct_missing": "col_pct_missing",
         "col_missing_coded": "col_missing_coded",
+        "col_missing_only_coded": "col_missing_only_coded",
+        "col_missing_consistent": "col_missing_consistent",
         "rows_distinct": "rows_distinct",
         "rows_complete": "rows_complete",
         "col_count_match": "col_count_match",

From 9488596a12c3cdb917208a8ebea6aeb156d9db6f Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 18:33:43 -0400
Subject: [PATCH 32/55] Create test_col_missing_consistent.py

---
 tests/test_col_missing_consistent.py | 114 +++++++++++++++++++++++++++
 1 file changed, 114 insertions(+)
 create mode 100644 tests/test_col_missing_consistent.py

diff --git a/tests/test_col_missing_consistent.py b/tests/test_col_missing_consistent.py
new file mode 100644
index 000000000..50c424ba4
--- /dev/null
+++ b/tests/test_col_missing_consistent.py
@@ -0,0 +1,114 @@
+import polars as pl
+import pandas as pd
+import pytest
+
+import pointblank as pb
+
+
+@pytest.fixture
+def spec():
+    return pb.MissingSpec(reasons={-99: "not_asked", -98: "refused"})
+
+
+def _info(v):
+    return v.validation_info[0]
+
+
+class TestColMissingConsistent:
+    def test_basic_inconsistency(self, spec):
+        tbl = pl.DataFrame(
+            {"income_source": [1, -99, 2, -99], "income_amount": [50000, -99, 42000, 38000]}
+        )
+        v = (
+            pb.Validate(data=tbl)
+            .col_missing_consistent(
+                columns=["income_source", "income_amount"], missing=spec, when_reason="not_asked"
+            )
+            .interrogate()
+        )
+        info = _info(v)
+        assert info.n == 4
+        assert info.n_failed == 1  # last row: only one column is -99
+
+    def test_all_consistent_passes(self, spec):
+        tbl = pl.DataFrame(
+            {"a": [1, -99, 2, -99], "b": [5, -99, 6, -99]}
+        )
+        v = (
+            pb.Validate(data=tbl)
+            .col_missing_consistent(columns=["a", "b"], missing=spec, when_reason="not_asked")
+            .interrogate()
+        )
+        assert _info(v).n_failed == 0
+
+    def test_null_reason_consistency(self):
+        # when_reason == null_reason, null_is_missing True -> nulls count
+        spec = pb.MissingSpec(reasons={-98: "refused"}, null_reason="unknown")
+        tbl = pl.DataFrame({"a": [1, None, None], "b": [5, None, 6]})
+        v = (
+            pb.Validate(data=tbl)
+            .col_missing_consistent(columns=["a", "b"], missing=spec, when_reason="unknown")
+            .interrogate()
+        )
+        # row2 both null -> ok; row3 only a null -> fail
+        assert _info(v).n_failed == 1
+
+    def test_three_columns(self, spec):
+        tbl = pl.DataFrame(
+            {"a": [-99, 1, -99], "b": [-99, 2, -99], "c": [-99, 3, 7]}
+        )
+        v = (
+            pb.Validate(data=tbl)
+            .col_missing_consistent(columns=["a", "b", "c"], missing=spec, when_reason="not_asked")
+            .interrogate()
+        )
+        # row1 all -99 ok; row2 none ok; row3 a,b -99 but c=7 -> fail
+        assert _info(v).n_failed == 1
+
+    def test_requires_two_columns(self, spec):
+        tbl = pl.DataFrame({"a": [1, 2]})
+        with pytest.raises(ValueError, match="at least two columns"):
+            pb.Validate(data=tbl).col_missing_consistent(
+                columns=["a"], missing=spec, when_reason="not_asked"
+            )
+
+    def test_missing_must_be_spec(self):
+        tbl = pl.DataFrame({"a": [1], "b": [2]})
+        with pytest.raises(TypeError):
+            pb.Validate(data=tbl).col_missing_consistent(
+                columns=["a", "b"], missing={-99: "x"}, when_reason="not_asked"
+            )
+
+    def test_pandas_backend(self, spec):
+        tbl = pd.DataFrame(
+            {"a": [1, -99, -99], "b": [5, -99, 6]}
+        )
+        v = (
+            pb.Validate(data=tbl)
+            .col_missing_consistent(columns=["a", "b"], missing=spec, when_reason="not_asked")
+            .interrogate()
+        )
+        assert _info(v).n_failed == 1
+
+    def test_report_and_step_report(self, spec):
+        tbl = pl.DataFrame({"a": [1, -99, -99], "b": [5, -99, 6]})
+        v = (
+            pb.Validate(data=tbl)
+            .col_missing_consistent(columns=["a", "b"], missing=spec, when_reason="not_asked")
+            .interrogate()
+        )
+        assert v.get_tabular_report() is not None
+        # step report (row-based extract path) should build without error
+        assert v.get_step_report(i=1) is not None
+
+    @pytest.mark.parametrize("lang", ["en", "fr", "de", "ja", "ar", "zh-Hans"])
+    def test_brief_langs(self, spec, lang):
+        tbl = pl.DataFrame({"a": [1, -99, -99], "b": [5, -99, 6]})
+        v = (
+            pb.Validate(data=tbl, lang=lang)
+            .col_missing_consistent(
+                columns=["a", "b"], missing=spec, when_reason="not_asked", brief=True
+            )
+            .interrogate()
+        )
+        assert _info(v).autobrief

From a21533edb0e24b40717058a4bbd0dbac0bb62b31 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 18:33:48 -0400
Subject: [PATCH 33/55] Create test_col_missing_only_coded.py

---
 tests/test_col_missing_only_coded.py | 115 +++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100644 tests/test_col_missing_only_coded.py

diff --git a/tests/test_col_missing_only_coded.py b/tests/test_col_missing_only_coded.py
new file mode 100644
index 000000000..7bb1bbbbb
--- /dev/null
+++ b/tests/test_col_missing_only_coded.py
@@ -0,0 +1,115 @@
+import polars as pl
+import pandas as pd
+import pytest
+
+import pointblank as pb
+
+
+@pytest.fixture
+def spec():
+    return pb.MissingSpec(reasons={-99: "not_asked", -98: "refused"})
+
+
+def _info(v):
+    return v.validation_info[0]
+
+
+class TestColMissingOnlyCoded:
+    def test_flags_undocumented_code(self, spec):
+        # -95 is undocumented; reals in [0,120]; -99/-98 documented
+        tbl = pl.DataFrame({"age": [34, -98, 41, -95, 29, -99, 55]})
+        v = (
+            pb.Validate(data=tbl)
+            .col_missing_only_coded(columns="age", missing=spec, min_val=0, max_val=120)
+            .interrogate()
+        )
+        info = _info(v)
+        assert info.n == 7
+        assert info.n_failed == 1  # only -95
+
+    def test_all_documented_or_real_passes(self, spec):
+        tbl = pl.DataFrame({"age": [34, -98, 41, -99, 29]})
+        v = (
+            pb.Validate(data=tbl)
+            .col_missing_only_coded(columns="age", missing=spec, min_val=0, max_val=120)
+            .interrogate()
+        )
+        assert _info(v).n_failed == 0
+
+    def test_allowed_set(self, spec):
+        tbl = pl.DataFrame({"grade": [1, 2, -99, 3, -95, -98]})
+        v = (
+            pb.Validate(data=tbl)
+            .col_missing_only_coded(columns="grade", missing=spec, allowed=[1, 2, 3])
+            .interrogate()
+        )
+        # -95 is undocumented -> 1 failure
+        assert _info(v).n_failed == 1
+
+    def test_null_documented_when_null_is_missing(self):
+        spec = pb.MissingSpec(reasons={-99: "not_asked"}, null_is_missing=True)
+        tbl = pl.DataFrame({"age": [34, None, -99, 200]})
+        v = (
+            pb.Validate(data=tbl)
+            .col_missing_only_coded(columns="age", missing=spec, min_val=0, max_val=120)
+            .interrogate()
+        )
+        # null passes (documented as unknown), -99 passes, 200 out of range -> fail
+        assert _info(v).n_failed == 1
+
+    def test_null_fails_when_not_missing(self):
+        spec = pb.MissingSpec(reasons={-99: "not_asked"}, null_is_missing=False)
+        tbl = pl.DataFrame({"age": [34, None, -99, 41]})
+        v = (
+            pb.Validate(data=tbl)
+            .col_missing_only_coded(columns="age", missing=spec, min_val=0, max_val=120)
+            .interrogate()
+        )
+        # null is neither documented nor a real value -> fail
+        assert _info(v).n_failed == 1
+
+    def test_requires_a_real_value_constraint(self, spec):
+        tbl = pl.DataFrame({"age": [1, 2, 3]})
+        with pytest.raises(ValueError, match="at least one of"):
+            pb.Validate(data=tbl).col_missing_only_coded(columns="age", missing=spec)
+
+    def test_missing_must_be_spec(self):
+        tbl = pl.DataFrame({"age": [1, 2, 3]})
+        with pytest.raises(TypeError):
+            pb.Validate(data=tbl).col_missing_only_coded(
+                columns="age", missing={-99: "x"}, min_val=0, max_val=10
+            )
+
+    def test_pandas_backend(self, spec):
+        tbl = pd.DataFrame({"age": [34, -98, -95, 200]})
+        v = (
+            pb.Validate(data=tbl)
+            .col_missing_only_coded(columns="age", missing=spec, min_val=0, max_val=120)
+            .interrogate()
+        )
+        # -95 undocumented, 200 out of range -> 2 failures
+        assert _info(v).n_failed == 2
+
+    def test_report_and_step_report(self, spec):
+        tbl = pl.DataFrame({"age": [34, -98, -95, 41]})
+        v = (
+            pb.Validate(data=tbl)
+            .col_missing_only_coded(
+                columns="age", missing=spec, min_val=0, max_val=120, brief=True
+            )
+            .interrogate()
+        )
+        assert v.get_tabular_report() is not None
+        assert v.get_step_report(i=1) is not None
+
+    @pytest.mark.parametrize("lang", ["en", "fr", "de", "ja", "ar", "zh-Hans"])
+    def test_brief_langs(self, spec, lang):
+        tbl = pl.DataFrame({"age": [34, -95]})
+        v = (
+            pb.Validate(data=tbl, lang=lang)
+            .col_missing_only_coded(
+                columns="age", missing=spec, min_val=0, max_val=120, brief=True
+            )
+            .interrogate()
+        )
+        assert _info(v).autobrief

From 4557687a1533e3d3a569a2fd8ca1677fc26c3a0f Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 18:33:53 -0400
Subject: [PATCH 34/55] Create test_missing_report_integration.py

---
 tests/test_missing_report_integration.py | 100 +++++++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 tests/test_missing_report_integration.py

diff --git a/tests/test_missing_report_integration.py b/tests/test_missing_report_integration.py
new file mode 100644
index 000000000..33babf327
--- /dev/null
+++ b/tests/test_missing_report_integration.py
@@ -0,0 +1,100 @@
+import polars as pl
+
+import pointblank as pb
+
+
+def test_tabular_report_annotates_missing_aware_steps():
+    tbl = pl.DataFrame({"age": [34, -98, 41, 200]})
+    spec = pb.MissingSpec(reasons={-99: "not_asked", -98: "refused"})
+    v = (
+        pb.Validate(data=tbl)
+        .col_vals_between(columns="age", left=0, right=120, missing=spec)
+        .interrogate()
+    )
+    html = v.get_tabular_report().as_raw_html()
+    assert "missing-aware" in html
+    assert "refused" in html and "not_asked" in html
+
+
+def test_tabular_report_no_annotation_without_missing():
+    tbl = pl.DataFrame({"age": [34, -98, 41, 200]})
+    v = (
+        pb.Validate(data=tbl)
+        .col_vals_between(columns="age", left=0, right=120)
+        .interrogate()
+    )
+    assert "missing-aware" not in v.get_tabular_report().as_raw_html()
+
+
+def test_dedicated_methods_show_context():
+    tbl = pl.DataFrame({"age": [34, -98, 41, -99]})
+    spec = pb.MissingSpec(
+        reasons={-99: "not_asked", -98: "refused"},
+        categories={"nonresponse": ["refused"]},
+    )
+    v = (
+        pb.Validate(data=tbl)
+        .col_pct_missing(columns="age", missing=spec, reason="refused", max_pct=0.5)
+        .col_missing_only_coded(columns="age", missing=spec, min_val=0, max_val=120)
+        .interrogate()
+    )
+    html = v.get_tabular_report().as_raw_html()
+    # col_pct_missing shows the reason filter; col_missing_only_coded shows the range
+    assert "reason = refused" in html
+    assert "[0, 120]" in html
+
+
+def test_step_report_shows_missing_codes_legend():
+    spec = pb.MissingSpec(reasons={-99: "not_asked", -98: "refused"})
+
+    # col_vals_* with missing=
+    tbl = pl.DataFrame({"age": [34, -98, 200, -99, 300]})
+    v = (
+        pb.Validate(data=tbl)
+        .col_vals_between(columns="age", left=0, right=120, missing=spec)
+        .interrogate()
+    )
+    h = v.get_step_report(i=1).as_raw_html()
+    assert "Missing codes" in h and "not_asked" in h and "refused" in h
+
+    # col_missing_coded (spec in values)
+    tbl2 = pl.DataFrame({"age": [34, None, 41]})
+    v2 = pb.Validate(data=tbl2).col_missing_coded(columns="age", missing=spec).interrogate()
+    assert "Missing codes" in v2.get_step_report(i=1).as_raw_html()
+
+    # col_missing_only_coded (spec stashed in values dict)
+    tbl3 = pl.DataFrame({"age": [34, -98, -95, 41]})
+    v3 = (
+        pb.Validate(data=tbl3)
+        .col_missing_only_coded(columns="age", missing=spec, min_val=0, max_val=120)
+        .interrogate()
+    )
+    assert "Missing codes" in v3.get_step_report(i=1).as_raw_html()
+
+    # col_missing_consistent
+    tbl4 = pl.DataFrame({"a": [1, -99, -99], "b": [5, -99, 6]})
+    v4 = (
+        pb.Validate(data=tbl4)
+        .col_missing_consistent(columns=["a", "b"], missing=spec, when_reason="not_asked")
+        .interrogate()
+    )
+    assert "Missing codes" in v4.get_step_report(i=1).as_raw_html()
+
+
+def test_step_report_no_legend_without_missing():
+    tbl = pl.DataFrame({"age": [34, 200, 41]})
+    v = pb.Validate(data=tbl).col_vals_between(columns="age", left=0, right=120).interrogate()
+    assert "Missing codes" not in v.get_step_report(i=1).as_raw_html()
+
+
+def test_report_renders_with_mixed_steps():
+    tbl = pl.DataFrame({"a": [1, -99, 3], "b": [-99, -99, 3]})
+    spec = pb.MissingSpec(reasons={-99: "not_asked"})
+    v = (
+        pb.Validate(data=tbl)
+        .col_vals_gt(columns="a", value=0, missing=spec)
+        .col_missing_consistent(columns=["a", "b"], missing=spec, when_reason="not_asked")
+        .col_missing_coded(columns="a", missing=spec)
+        .interrogate()
+    )
+    assert v.get_tabular_report() is not None

From 1f8558a12a2b5d589d2d76894e174081ee0a77c1 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Tue, 16 Jun 2026 18:33:58 -0400
Subject: [PATCH 35/55] Update test_missing_vals_tbl_structured.py

---
 tests/test_missing_vals_tbl_structured.py | 25 +++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tests/test_missing_vals_tbl_structured.py b/tests/test_missing_vals_tbl_structured.py
index 30c2f9907..fde123bbe 100644
--- a/tests/test_missing_vals_tbl_structured.py
+++ b/tests/test_missing_vals_tbl_structured.py
@@ -84,3 +84,28 @@ def test_unknown_column_raises(self, tbl_pl):
         spec = pb.MissingSpec(reasons={-99: "not_asked"})
         with pytest.raises(ValueError, match="not found"):
             pb.missing_vals_tbl(tbl_pl, missing={"nonexistent": spec})
+
+
+class TestMissingHeatmap:
+    def test_heatmap_returns_gt(self, tbl_pl, specs):
+        result = pb.missing_vals_tbl(tbl_pl, missing=specs, as_heatmap=True)
+        assert isinstance(result, GT)
+
+    def test_heatmap_title_and_labels(self, tbl_pl, specs):
+        html = pb.missing_vals_tbl(tbl_pl, missing=specs, as_heatmap=True).as_raw_html()
+        assert "Missing Pattern Heatmap" in html
+        assert "Refused" in html and "Below Threshold" in html
+        assert "%" in html  # proportions formatted as percentages
+
+    def test_heatmap_pandas(self, specs):
+        tbl = pd.DataFrame(
+            {
+                "age": [34, -98, 41, -99, 29, -98, 55, None],
+                "income": [50000, -99, -1, None, 42000, -99, 38000, 61000],
+            }
+        )
+        assert isinstance(pb.missing_vals_tbl(tbl, missing=specs, as_heatmap=True), GT)
+
+    def test_as_heatmap_ignored_without_missing(self, tbl_pl):
+        # as_heatmap only applies with missing=; default sector view still returned
+        assert isinstance(pb.missing_vals_tbl(tbl_pl, as_heatmap=True), GT)

From 9de4244bcd466d1fa84cc9de16fffb2caed85ca8 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 10:49:31 -0400
Subject: [PATCH 36/55] Use report notes to present MissingSpec details

---
 pointblank/validate.py | 114 +++++++++++++++++++++++++++++++++--------
 1 file changed, 93 insertions(+), 21 deletions(-)

diff --git a/pointblank/validate.py b/pointblank/validate.py
index d3a0b0120..f1b1ae49e 100644
--- a/pointblank/validate.py
+++ b/pointblank/validate.py
@@ -10802,6 +10802,7 @@ def col_pct_missing(
                     "max_pct": max_pct,
                     "reason": reason,
                     "category": category,
+                    "spec": missing,
                 },
                 thresholds=thresholds,
                 actions=actions,
@@ -14310,6 +14311,15 @@ def interrogate(
 
             validation.autobrief = autobrief
 
+            # If the step carries structured-missingness context (a `missing=` spec or a dedicated
+            # missing method), attach a one-line note summarizing the codes and any reason/range
+            # filter. This keeps the VALUES cell minimal while surfacing detail in the Notes section.
+            missing_note = _build_missing_note(validation)
+            if missing_note is not None:
+                validation._add_note(
+                    key="missing_spec", markdown=missing_note[0], text=missing_note[1]
+                )
+
             # ------------------------------------------------
             # Bypassing the validation step if conditions met
             # ------------------------------------------------
@@ -17881,18 +17891,16 @@ def get_tabular_report(
                 values_upd.append("&mdash;")
 
             elif assertion_type[i] in ["col_missing_consistent"]:
-                # Show the reason being checked for cross-column consistency
-                values_upd.append(f"when_reason = {value.get('when_reason')}")
+                # Minimal cell: a compact badge (the reason and columns live in the step note)
+                values_upd.append(
+                    "<span style='font-weight: 600; letter-spacing: 0.5px;'>CONSISTENT</span>"
+                )
 
             elif assertion_type[i] in ["col_missing_only_coded"]:
-                # Show the allowed real values and/or range used to define legitimate values
-                parts = []
-                if value.get("allowed") is not None:
-                    allowed_str = str(value["allowed"])[1:-1].replace("'", "")
-                    parts.append(f"allowed = {allowed_str}")
-                if value.get("min_val") is not None or value.get("max_val") is not None:
-                    parts.append(f"[{value.get('min_val')}, {value.get('max_val')}]")
-                values_upd.append("<br/>".join(parts) if parts else "&mdash;")
+                # Minimal cell: a compact badge (allowed values/range live in the step note)
+                values_upd.append(
+                    "<span style='font-weight: 600; letter-spacing: 0.5px;'>ONLY CODED</span>"
+                )
 
             elif assertion_type[i] in ["col_pct_null"]:
                 # Extract p and tol from the values dict for nice formatting
@@ -17904,14 +17912,8 @@ def get_tabular_report(
                 values_upd.append(f"p = {p_value}<br/>tol = {tol_value}")
 
             elif assertion_type[i] in ["col_pct_missing"]:
-                # Format the max_pct and any reason/category filter for display
-                max_pct_value = value["max_pct"]
-                filter_line = ""
-                if value.get("reason") is not None:
-                    filter_line = f"<br/>reason = {value['reason']}"
-                elif value.get("category") is not None:
-                    filter_line = f"<br/>category = {value['category']}"
-                values_upd.append(f"max_pct = {max_pct_value}{filter_line}")
+                # Minimal cell: just the threshold (reason/category detail lives in the step note)
+                values_upd.append(f"&le; {value['max_pct']}")
 
             elif assertion_type[i] in ["data_freshness"]:
                 # Format max_age nicely for display
@@ -18054,10 +18056,10 @@ def get_tabular_report(
         for i, spec in enumerate(missing_specs):
             if spec is None or i >= len(values_upd):
                 continue
-            reasons = ", ".join(spec.reasons_list()) if hasattr(spec, "reasons_list") else ""
+            # Keep the cell minimal: a compact badge. The reason/code detail lives in the step note.
             annotation = (
-                "<br/><span style='font-size: 9px; color: #999999;'>"
-                f"missing-aware: {reasons}</span>"
+                "<br/><span style='font-size: 8px; font-weight: 600; letter-spacing: 0.5px; "
+                "color: #7B68A6;'>MISSING-AWARE</span>"
             )
             values_upd[i] = f"{values_upd[i]}{annotation}"
 
@@ -21107,6 +21109,76 @@ def _apply_segments(data_tbl: Any, segments_expr: tuple[str, str]) -> Any:
     return data_tbl
 
 
+def _resolve_step_missing_spec(validation: Any) -> Any:
+    """Return the `MissingSpec` associated with a validation step, if any.
+
+    The spec lives in different places depending on the method: on `validation.missing` for
+    `col_vals_*` steps that used `missing=`; directly in `validation.values` for `col_missing_coded`;
+    and under `validation.values["spec"]` for `col_pct_missing`, `col_missing_only_coded`, and
+    `col_missing_consistent`.
+    """
+    spec = getattr(validation, "missing", None)
+    if spec is not None:
+        return spec
+    vals = getattr(validation, "values", None)
+    if isinstance(vals, MissingSpec):
+        return vals
+    if isinstance(vals, dict) and isinstance(vals.get("spec"), MissingSpec):
+        return vals["spec"]
+    return None
+
+
+def _build_missing_note(validation: Any) -> tuple[str, str] | None:
+    """Build a one-line (markdown, text) note summarizing a step's structured-missingness context.
+
+    Returns `None` when the step has no associated `MissingSpec`.
+    """
+    spec = _resolve_step_missing_spec(validation)
+    if spec is None or not hasattr(spec, "reasons"):
+        return None
+
+    codes_md = ", ".join(f"`{value}`&rarr;{reason}" for value, reason in spec.reasons.items())
+    codes_tx = ", ".join(f"{value}->{reason}" for value, reason in spec.reasons.items())
+    if getattr(spec, "null_is_missing", False):
+        codes_md += f", `null`&rarr;{spec.null_reason}"
+        codes_tx += f", null->{spec.null_reason}"
+
+    md = f"**Missing codes:** {codes_md}"
+    tx = f"Missing codes: {codes_tx}"
+
+    # Method-specific context appended to the one-line summary
+    assertion_type = getattr(validation, "assertion_type", None)
+    vals = getattr(validation, "values", None)
+
+    if assertion_type == "col_pct_missing" and isinstance(vals, dict):
+        if vals.get("reason") is not None:
+            md += f". Counting reason `{vals['reason']}`"
+            tx += f". Counting reason {vals['reason']}"
+        elif vals.get("category") is not None:
+            md += f". Counting category `{vals['category']}`"
+            tx += f". Counting category {vals['category']}"
+    elif assertion_type == "col_missing_only_coded" and isinstance(vals, dict):
+        bits_md = []
+        bits_tx = []
+        if vals.get("allowed") is not None:
+            allowed_str = ", ".join(str(a) for a in vals["allowed"])
+            bits_md.append(f"allowed {{{allowed_str}}}")
+            bits_tx.append(f"allowed {{{allowed_str}}}")
+        if vals.get("min_val") is not None or vals.get("max_val") is not None:
+            rng = f"[{vals.get('min_val')}, {vals.get('max_val')}]"
+            bits_md.append(f"range {rng}")
+            bits_tx.append(f"range {rng}")
+        if bits_md:
+            md += f". Legitimate values: {', '.join(bits_md)}"
+            tx += f". Legitimate values: {', '.join(bits_tx)}"
+    elif assertion_type == "col_missing_consistent" and isinstance(vals, dict):
+        if vals.get("when_reason") is not None:
+            md += f". Consistency required for reason `{vals['when_reason']}`"
+            tx += f". Consistency required for reason {vals['when_reason']}"
+
+    return md, tx
+
+
 def _missing_legend_html(spec: Any) -> str:
     """Build an HTML legend of a MissingSpec's sentinel codes and their reasons, for step reports."""
     if not hasattr(spec, "reasons"):

From 17b0ea8b463320e54103c6a92a018be172fe8ece Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 10:49:35 -0400
Subject: [PATCH 37/55] Update test_missing_report_integration.py

---
 tests/test_missing_report_integration.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/tests/test_missing_report_integration.py b/tests/test_missing_report_integration.py
index 33babf327..edd08cda2 100644
--- a/tests/test_missing_report_integration.py
+++ b/tests/test_missing_report_integration.py
@@ -12,7 +12,9 @@ def test_tabular_report_annotates_missing_aware_steps():
         .interrogate()
     )
     html = v.get_tabular_report().as_raw_html()
-    assert "missing-aware" in html
+    # The VALUES cell carries a compact badge; the reason/code detail goes to the step note
+    assert "MISSING-AWARE" in html
+    assert "Missing codes" in html
     assert "refused" in html and "not_asked" in html
 
 
@@ -23,10 +25,12 @@ def test_tabular_report_no_annotation_without_missing():
         .col_vals_between(columns="age", left=0, right=120)
         .interrogate()
     )
-    assert "missing-aware" not in v.get_tabular_report().as_raw_html()
+    html = v.get_tabular_report().as_raw_html()
+    assert "MISSING-AWARE" not in html
+    assert "Missing codes" not in html
 
 
-def test_dedicated_methods_show_context():
+def test_dedicated_methods_show_minimal_cell_and_note():
     tbl = pl.DataFrame({"age": [34, -98, 41, -99]})
     spec = pb.MissingSpec(
         reasons={-99: "not_asked", -98: "refused"},
@@ -39,9 +43,15 @@ def test_dedicated_methods_show_context():
         .interrogate()
     )
     html = v.get_tabular_report().as_raw_html()
-    # col_pct_missing shows the reason filter; col_missing_only_coded shows the range
-    assert "reason = refused" in html
-    assert "[0, 120]" in html
+    # Compact VALUES cells: a threshold for col_pct_missing and an "ONLY CODED" badge
+    assert "ONLY CODED" in html
+    # Detail is surfaced via the auto Notes system
+    assert "Missing codes" in html
+    assert "Counting reason" in html and "refused" in html
+    assert "Legitimate values" in html and "[0, 120]" in html
+    # The old verbose VALUES strings should no longer be present
+    assert "reason = refused" not in html
+    assert "max_pct = " not in html
 
 
 def test_step_report_shows_missing_codes_legend():

From 41ede89d02474dde81ebfa22f8f62ea2a3efa10f Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 11:36:47 -0400
Subject: [PATCH 38/55] Update missing.py

---
 pointblank/missing.py | 256 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 256 insertions(+)

diff --git a/pointblank/missing.py b/pointblank/missing.py
index 04f3150a6..de533d560 100644
--- a/pointblank/missing.py
+++ b/pointblank/missing.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import re
 from dataclasses import dataclass, field
 from typing import Any
 
@@ -8,6 +9,31 @@
 ]
 
 
+# Standard HL7/CDISC null flavors mapped to snake_case reason labels
+_CDISC_NULL_FLAVORS: dict[str, str] = {
+    "NI": "no_information",
+    "NA": "not_applicable",
+    "UNK": "unknown",
+    "ASKU": "asked_but_unknown",
+    "NAV": "temporarily_unavailable",
+    "NASK": "not_asked",
+    "OTH": "other",
+    "PINF": "positive_infinity",
+    "NINF": "negative_infinity",
+    "MSK": "masked",
+    "DER": "derived",
+    "QS": "sufficient_quantity",
+    "TRC": "trace",
+    "NP": "not_present",
+}
+
+
+def _slugify(label: Any) -> str:
+    """Convert a human-readable label into a snake_case reason identifier."""
+    slug = re.sub(r"[^0-9a-zA-Z]+", "_", str(label).strip().lower()).strip("_")
+    return slug or "missing"
+
+
 @dataclass
 class MissingSpec:
     """
@@ -240,3 +266,233 @@ def reasons_list(self) -> list[str]:
         if self.null_is_missing:
             seen.setdefault(self.null_reason, None)
         return list(seen.keys())
+
+    # ------------------------------------------------------------------
+    # Factory methods (pre-built specs and metadata-import integration)
+    # ------------------------------------------------------------------
+
+    @classmethod
+    def from_cdisc_null_flavors(
+        cls,
+        null_is_missing: bool = True,
+        null_reason: str = "no_information",
+        description: str | None = "CDISC/HL7 null flavors",
+    ) -> "MissingSpec":
+        """Create a `MissingSpec` for the standard HL7/CDISC *null flavors*.
+
+        Clinical data uses standardized null flavor codes to record *why* a value is absent (e.g.,
+        `"NASK"` for "not asked", `"UNK"` for "unknown"). This returns a ready-to-use spec mapping
+        those codes to reason labels.
+
+        Parameters
+        ----------
+        null_is_missing
+            Whether actual null values should also be treated as missing. Default is `True`.
+        null_reason
+            The reason label for actual null values. Default is `"no_information"`.
+        description
+            Optional description. Default identifies the spec as CDISC/HL7 null flavors.
+
+        Returns
+        -------
+        MissingSpec
+            A spec with the standard null flavor codes.
+
+        Examples
+        --------
+        ```python
+        import pointblank as pb
+
+        cdisc_missing = pb.MissingSpec.from_cdisc_null_flavors()
+        cdisc_missing.reason_for("NASK")   # "not_asked"
+        ```
+        """
+        reasons = dict(_CDISC_NULL_FLAVORS)
+        categories = {
+            "unknown": ["no_information", "unknown", "asked_but_unknown", "temporarily_unavailable"],
+            "not_applicable": ["not_applicable", "not_asked", "not_present"],
+            "boundary": ["positive_infinity", "negative_infinity"],
+        }
+        return cls(
+            reasons=reasons,
+            categories=categories,
+            null_is_missing=null_is_missing,
+            null_reason=null_reason,
+            description=description,
+        )
+
+    # Convenient short alias
+    @classmethod
+    def from_cdisc(cls, **kwargs: Any) -> "MissingSpec":
+        """Alias for [`from_cdisc_null_flavors()`](`pointblank.MissingSpec.from_cdisc_null_flavors`)."""
+        return cls.from_cdisc_null_flavors(**kwargs)
+
+    @classmethod
+    def from_sas(
+        cls,
+        reasons: dict[str, str] | None = None,
+        include_underscore: bool = True,
+        null_is_missing: bool = True,
+        null_reason: str = "system_missing",
+        description: str | None = "SAS special missing values",
+    ) -> "MissingSpec":
+        """Create a `MissingSpec` for SAS special missing values.
+
+        SAS encodes missingness with `"."` (system missing), `"._"`, and `".A"` through `".Z"` (27
+        user-defined missing codes). This returns a spec covering all of them; you can override the
+        reason label for any specific code via `reasons=`.
+
+        Parameters
+        ----------
+        reasons
+            Optional mapping of specific SAS missing codes to custom reason labels (e.g.,
+            `{".A": "not_applicable", ".B": "below_detection"}`). These override the defaults.
+        include_underscore
+            Whether to include the `"._"` special missing code. Default is `True`.
+        null_is_missing
+            Whether actual null values should also be treated as missing. Default is `True`.
+        null_reason
+            The reason label for actual null values. Default is `"system_missing"`.
+        description
+            Optional description. Default identifies the spec as SAS special missing values.
+
+        Returns
+        -------
+        MissingSpec
+            A spec covering the SAS special missing values.
+
+        Examples
+        --------
+        ```python
+        import pointblank as pb
+
+        sas_missing = pb.MissingSpec.from_sas(
+            reasons={".A": "not_applicable", ".B": "below_detection"}
+        )
+        sas_missing.reason_for(".A")   # "not_applicable"
+        sas_missing.reason_for(".C")   # "user_missing_c"
+        ```
+        """
+        built: dict[Any, str] = {".": "system_missing"}
+        if include_underscore:
+            built["._"] = "system_missing"
+        for letter in "ABCDEFGHIJKLMNOPQRSTUVWXYZ":
+            built[f".{letter}"] = f"user_missing_{letter.lower()}"
+        if reasons:
+            for code, label in reasons.items():
+                built[code] = label
+        return cls(
+            reasons=built,
+            null_is_missing=null_is_missing,
+            null_reason=null_reason,
+            description=description,
+        )
+
+    @classmethod
+    def from_spss(
+        cls,
+        missing_values: list,
+        labels: dict[Any, str] | None = None,
+        null_is_missing: bool = True,
+        null_reason: str = "unknown",
+        description: str | None = "SPSS user-defined missing values",
+    ) -> "MissingSpec":
+        """Create a `MissingSpec` from SPSS-style user-defined missing values.
+
+        SPSS supports up to 3 user-defined missing values per variable (plus a range). Pass the
+        missing values (and optionally their value labels) to build a spec. Reason labels are
+        derived from the labels when available, otherwise a `"missing_<value>"` placeholder is used.
+
+        Parameters
+        ----------
+        missing_values
+            The sentinel values that SPSS marks as missing for the variable (e.g., `[-99, -98]`).
+        labels
+            Optional mapping of sentinel value to human-readable label (e.g., `{-99: "Refused"}`).
+            Labels are slugified into reason identifiers (e.g., `"Refused"` -> `"refused"`).
+        null_is_missing
+            Whether actual null values should also be treated as missing. Default is `True`.
+        null_reason
+            The reason label for actual null values. Default is `"unknown"`.
+        description
+            Optional description. Default identifies the spec as SPSS user-defined missing values.
+
+        Returns
+        -------
+        MissingSpec
+            A spec built from the SPSS missing values.
+
+        Examples
+        --------
+        ```python
+        import pointblank as pb
+
+        spss_missing = pb.MissingSpec.from_spss(
+            missing_values=[-99, -98],
+            labels={-99: "Not asked", -98: "Refused"},
+        )
+        spss_missing.reason_for(-98)   # "refused"
+        ```
+        """
+        labels = labels or {}
+        reasons = {
+            value: (_slugify(labels[value]) if value in labels else f"missing_{_slugify(value)}")
+            for value in missing_values
+        }
+        return cls(
+            reasons=reasons,
+            null_is_missing=null_is_missing,
+            null_reason=null_reason,
+            description=description,
+        )
+
+    @classmethod
+    def from_variable_metadata(
+        cls,
+        variable: Any,
+        null_is_missing: bool = True,
+        null_reason: str = "unknown",
+    ) -> "MissingSpec | None":
+        """Create a `MissingSpec` from an imported variable's metadata.
+
+        This works with a [`VariableMetadata`](`pointblank.VariableMetadata`) object (as produced by
+        [`import_metadata()`](`pointblank.import_metadata`) for SPSS, Stata, and SAS files). It reads
+        the variable's `missing_values` and derives reason labels from `missing_value_labels` or
+        `value_labels` when available.
+
+        Parameters
+        ----------
+        variable
+            A variable-metadata object exposing `missing_values` and (optionally)
+            `missing_value_labels` / `value_labels` attributes.
+        null_is_missing
+            Whether actual null values should also be treated as missing. Default is `True`.
+        null_reason
+            The reason label for actual null values. Default is `"unknown"`.
+
+        Returns
+        -------
+        MissingSpec | None
+            A spec built from the variable's missing values, or `None` if the variable declares no
+            missing values.
+        """
+        missing_values = getattr(variable, "missing_values", None) or []
+        if not missing_values:
+            return None
+
+        labels = getattr(variable, "missing_value_labels", None) or {}
+        value_labels = getattr(variable, "value_labels", None) or {}
+
+        reasons: dict[Any, str] = {}
+        for value in missing_values:
+            label = labels.get(value)
+            if label is None:
+                label = value_labels.get(value)
+            reasons[value] = _slugify(label) if label else f"missing_{_slugify(value)}"
+
+        return cls(
+            reasons=reasons,
+            null_is_missing=null_is_missing,
+            null_reason=null_reason,
+            description=f"Imported missing values for '{getattr(variable, 'name', 'variable')}'",
+        )

From e9bfabe6280fc0e1421c8d7dccac88f5eb0cb3bd Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 11:36:55 -0400
Subject: [PATCH 39/55] Update _types.py

---
 pointblank/metadata/_types.py | 48 +++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/pointblank/metadata/_types.py b/pointblank/metadata/_types.py
index 04e6b83b1..c70e9a78d 100644
--- a/pointblank/metadata/_types.py
+++ b/pointblank/metadata/_types.py
@@ -223,6 +223,21 @@ class VariableMetadata:
     unit: str | None = None
     unit_system: str | None = None
 
+    def to_missing_spec(self) -> Any:
+        """Build a [`MissingSpec`](`pointblank.MissingSpec`) from this variable's missing values.
+
+        Reads `missing_values` and derives reason labels from `missing_value_labels` or
+        `value_labels` when available.
+
+        Returns
+        -------
+        MissingSpec | None
+            A `MissingSpec` for the variable, or `None` if no missing values are declared.
+        """
+        from pointblank.missing import MissingSpec
+
+        return MissingSpec.from_variable_metadata(self)
+
 
 @dataclass
 class MetadataImport:
@@ -340,6 +355,39 @@ def get_variable(self, name: str) -> VariableMetadata:
                 return var
         raise KeyError(f"No variable named '{name}' in imported metadata")
 
+    def missing_specs(self) -> dict[str, Any]:
+        """Auto-generate [`MissingSpec`](`pointblank.MissingSpec`) objects for all variables.
+
+        Builds a mapping of column name to `MissingSpec` for every imported variable that declares
+        missing values (e.g., SPSS user-defined missing values, SAS special missing). The result
+        can be passed directly to validation methods (via `missing=`) or to
+        [`missing_vals_tbl()`](`pointblank.missing_vals_tbl`).
+
+        Returns
+        -------
+        dict[str, MissingSpec]
+            A mapping of column name to `MissingSpec`. Variables without declared missing values
+            are omitted.
+
+        Examples
+        --------
+        ```python
+        import pointblank as pb
+
+        meta = pb.import_metadata("survey.sav", format="spss")
+        specs = meta.missing_specs()
+
+        # Use the auto-generated specs in a missingness report
+        pb.missing_vals_tbl(data, missing=specs)
+        ```
+        """
+        specs: dict[str, Any] = {}
+        for var in self.variables:
+            spec = var.to_missing_spec()
+            if spec is not None:
+                specs[var.name] = spec
+        return specs
+
     def get_codelist(self, name: str) -> Codelist:
         """Get a specific codelist by name.
 

From 5aa2d1c027d5093ad71383e9f3fef573bdc9fb0b Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 11:36:57 -0400
Subject: [PATCH 40/55] Create test_missing_factories.py

---
 tests/test_missing_factories.py | 145 ++++++++++++++++++++++++++++++++
 1 file changed, 145 insertions(+)
 create mode 100644 tests/test_missing_factories.py

diff --git a/tests/test_missing_factories.py b/tests/test_missing_factories.py
new file mode 100644
index 000000000..7cdeb0610
--- /dev/null
+++ b/tests/test_missing_factories.py
@@ -0,0 +1,145 @@
+import pytest
+
+import pointblank as pb
+from pointblank.missing import MissingSpec, _slugify
+from pointblank.metadata import VariableMetadata, MetadataImport
+
+
+class TestSlugify:
+    @pytest.mark.parametrize(
+        "label,expected",
+        [
+            ("Refused", "refused"),
+            ("Not Applicable", "not_applicable"),
+            ("DON'T KNOW", "don_t_know"),
+            ("  spaced  ", "spaced"),
+            (-99, "99"),
+            ("", "missing"),
+        ],
+    )
+    def test_slugify(self, label, expected):
+        assert _slugify(label) == expected
+
+
+class TestFromCdisc:
+    def test_standard_codes(self):
+        spec = MissingSpec.from_cdisc_null_flavors()
+        assert spec.reason_for("NASK") == "not_asked"
+        assert spec.reason_for("UNK") == "unknown"
+        assert spec.reason_for("PINF") == "positive_infinity"
+        assert spec.reason_for("NA") == "not_applicable"
+
+    def test_categories(self):
+        spec = MissingSpec.from_cdisc_null_flavors()
+        assert set(spec.values_for_category("boundary")) == {"PINF", "NINF"}
+        assert "NASK" in spec.values_for_category("not_applicable")
+
+    def test_alias(self):
+        assert MissingSpec.from_cdisc().reason_for("MSK") == "masked"
+
+    def test_null_handling(self):
+        spec = MissingSpec.from_cdisc_null_flavors()
+        assert spec.null_is_missing is True
+        assert spec.reason_for(None) == "no_information"
+
+    def test_exported_via_top_level(self):
+        assert pb.MissingSpec.from_cdisc_null_flavors().reason_for("NI") == "no_information"
+
+
+class TestFromSas:
+    def test_defaults(self):
+        spec = MissingSpec.from_sas()
+        assert spec.reason_for(".") == "system_missing"
+        assert spec.reason_for(".A") == "user_missing_a"
+        assert spec.reason_for(".Z") == "user_missing_z"
+        assert spec.reason_for("._") == "system_missing"
+
+    def test_overrides(self):
+        spec = MissingSpec.from_sas(reasons={".A": "not_applicable", ".B": "below_detection"})
+        assert spec.reason_for(".A") == "not_applicable"
+        assert spec.reason_for(".B") == "below_detection"
+        assert spec.reason_for(".C") == "user_missing_c"  # default preserved
+
+    def test_no_underscore(self):
+        spec = MissingSpec.from_sas(include_underscore=False)
+        assert spec.reason_for("._") is None
+        # 26 letters + "." = 27 sentinels
+        assert len(spec.sentinel_values()) == 27
+
+
+class TestFromSpss:
+    def test_with_labels(self):
+        spec = MissingSpec.from_spss(
+            missing_values=[-99, -98], labels={-99: "Not asked", -98: "Refused"}
+        )
+        assert spec.reason_for(-99) == "not_asked"
+        assert spec.reason_for(-98) == "refused"
+
+    def test_without_labels(self):
+        spec = MissingSpec.from_spss(missing_values=[-99, -1])
+        assert spec.reason_for(-99) == "missing_99"
+        assert spec.reason_for(-1) == "missing_1"
+
+
+class TestFromVariableMetadata:
+    def test_uses_missing_value_labels(self):
+        var = VariableMetadata(
+            name="age",
+            dtype="Int64",
+            missing_values=[-99, -98],
+            missing_value_labels={-99: "Not asked", -98: "Refused"},
+        )
+        spec = MissingSpec.from_variable_metadata(var)
+        assert spec.reason_for(-98) == "refused"
+
+    def test_falls_back_to_value_labels(self):
+        var = VariableMetadata(
+            name="age",
+            dtype="Int64",
+            missing_values=[-99],
+            value_labels={-99: "Not Asked", 1: "Yes"},
+        )
+        spec = MissingSpec.from_variable_metadata(var)
+        assert spec.reason_for(-99) == "not_asked"
+
+    def test_no_missing_returns_none(self):
+        var = VariableMetadata(name="id", dtype="Int64")
+        assert MissingSpec.from_variable_metadata(var) is None
+
+    def test_to_missing_spec_method(self):
+        var = VariableMetadata(name="age", dtype="Int64", missing_values=[-99])
+        assert var.to_missing_spec().is_missing(-99) is True
+
+
+class TestMetadataImportMissingSpecs:
+    def test_missing_specs_mapping(self):
+        v1 = VariableMetadata(
+            name="age",
+            dtype="Int64",
+            missing_values=[-99, -98],
+            missing_value_labels={-99: "Not asked", -98: "Refused"},
+        )
+        v2 = VariableMetadata(name="id", dtype="Int64")  # no missing values
+        meta = MetadataImport(source_format="spss", variables=[v1, v2])
+
+        specs = meta.missing_specs()
+        assert list(specs.keys()) == ["age"]  # id omitted (no missing values)
+        assert specs["age"].reason_for(-99) == "not_asked"
+
+    def test_specs_usable_in_validation(self):
+        import polars as pl
+
+        v = VariableMetadata(
+            name="age", dtype="Int64", missing_values=[-99], missing_value_labels={-99: "Not asked"}
+        )
+        meta = MetadataImport(source_format="spss", variables=[v])
+        specs = meta.missing_specs()
+
+        tbl = pl.DataFrame({"age": [34, -99, 200]})
+        validation = (
+            pb.Validate(data=tbl)
+            .col_vals_between(columns="age", left=0, right=120, missing=specs["age"])
+            .interrogate()
+        )
+        # -99 excluded; only 200 fails
+        assert validation.validation_info[0].n_failed == 1

From ba0daf21a7057bf4b5b0ee82e3c0b0aea94c552d Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 12:46:29 -0400
Subject: [PATCH 41/55] Add missing sections in existing docstrings

---
 pointblank/validate.py | 160 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 160 insertions(+)

diff --git a/pointblank/validate.py b/pointblank/validate.py
index f1b1ae49e..4d3f9464e 100644
--- a/pointblank/validate.py
+++ b/pointblank/validate.py
@@ -10711,6 +10711,34 @@ def col_pct_missing(
         Validate
             The `Validate` object with the added validation step.
 
+        Thresholds
+        ----------
+        The `thresholds=` parameter is used to set the failure-condition levels for the validation
+        step. If they are set here at the step level, these thresholds will override any thresholds
+        set at the global level in `Validate(thresholds=...)`.
+
+        There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
+        can either be set as a proportion failing of all test units (a value between `0` to `1`),
+        or, the absolute number of failing test units (as integer that's `1` or greater).
+
+        Thresholds can be defined using one of these input schemes:
+
+        1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
+        thresholds)
+        2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
+        the 'error' level, and position `2` is the 'critical' level
+        3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
+        'critical'
+        4. a single integer/float value denoting absolute number or fraction of failing test units
+        for the 'warning' level only
+
+        If the number of failing test units exceeds set thresholds, the validation step will be
+        marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
+        set, you're free to set any combination of them.
+
+        Aside from reporting failure conditions, thresholds can be used to determine the actions to
+        take for each level of failure (using the `actions=` parameter).
+
         Examples
         --------
         ```{python}
@@ -10879,6 +10907,50 @@ def col_missing_coded(
         Validate
             The `Validate` object with the added validation step.
 
+        Preprocessing
+        -------------
+        The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
+        table during interrogation. This function should take a table as input and return a modified
+        table. This is useful for performing any necessary transformations or filtering on the data
+        before the validation step is applied.
+
+        Segmentation
+        ------------
+        The `segments=` argument allows for the segmentation of a validation step into multiple
+        segments. This is useful for applying the same validation step to different subsets of the
+        data. The segmentation can be done based on a single column or specific fields within a
+        column. Providing a single column name results in a separate validation step for each unique
+        value in that column; a tuple of `(column, values)` restricts segmentation to the listed
+        values. The segmentation is performed after any `pre=` preprocessing.
+
+        Thresholds
+        ----------
+        The `thresholds=` parameter is used to set the failure-condition levels for the validation
+        step. If they are set here at the step level, these thresholds will override any thresholds
+        set at the global level in `Validate(thresholds=...)`.
+
+        There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
+        can either be set as a proportion failing of all test units (a value between `0` to `1`),
+        or, the absolute number of failing test units (as integer that's `1` or greater).
+
+        Thresholds can be defined using one of these input schemes:
+
+        1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
+        thresholds)
+        2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
+        the 'error' level, and position `2` is the 'critical' level
+        3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
+        'critical'
+        4. a single integer/float value denoting absolute number or fraction of failing test units
+        for the 'warning' level only
+
+        If the number of failing test units exceeds set thresholds, the validation step will be
+        marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
+        set, you're free to set any combination of them.
+
+        Aside from reporting failure conditions, thresholds can be used to determine the actions to
+        take for each level of failure (using the `actions=` parameter).
+
         Examples
         --------
         ```{python}
@@ -11025,6 +11097,50 @@ def col_missing_only_coded(
         Validate
             The `Validate` object with the added validation step.
 
+        Preprocessing
+        -------------
+        The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
+        table during interrogation. This function should take a table as input and return a modified
+        table. This is useful for performing any necessary transformations or filtering on the data
+        before the validation step is applied.
+
+        Segmentation
+        ------------
+        The `segments=` argument allows for the segmentation of a validation step into multiple
+        segments. This is useful for applying the same validation step to different subsets of the
+        data. The segmentation can be done based on a single column or specific fields within a
+        column. Providing a single column name results in a separate validation step for each unique
+        value in that column; a tuple of `(column, values)` restricts segmentation to the listed
+        values. The segmentation is performed after any `pre=` preprocessing.
+
+        Thresholds
+        ----------
+        The `thresholds=` parameter is used to set the failure-condition levels for the validation
+        step. If they are set here at the step level, these thresholds will override any thresholds
+        set at the global level in `Validate(thresholds=...)`.
+
+        There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
+        can either be set as a proportion failing of all test units (a value between `0` to `1`),
+        or, the absolute number of failing test units (as integer that's `1` or greater).
+
+        Thresholds can be defined using one of these input schemes:
+
+        1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
+        thresholds)
+        2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
+        the 'error' level, and position `2` is the 'critical' level
+        3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
+        'critical'
+        4. a single integer/float value denoting absolute number or fraction of failing test units
+        for the 'warning' level only
+
+        If the number of failing test units exceeds set thresholds, the validation step will be
+        marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
+        set, you're free to set any combination of them.
+
+        Aside from reporting failure conditions, thresholds can be used to determine the actions to
+        take for each level of failure (using the `actions=` parameter).
+
         Examples
         --------
         ```{python}
@@ -11674,6 +11790,50 @@ def col_missing_consistent(
         Validate
             The `Validate` object with the added validation step.
 
+        Preprocessing
+        -------------
+        The `pre=` argument allows for a preprocessing function or lambda to be applied to the data
+        table during interrogation. This function should take a table as input and return a modified
+        table. This is useful for performing any necessary transformations or filtering on the data
+        before the validation step is applied.
+
+        Segmentation
+        ------------
+        The `segments=` argument allows for the segmentation of a validation step into multiple
+        segments. This is useful for applying the same validation step to different subsets of the
+        data. The segmentation can be done based on a single column or specific fields within a
+        column. Providing a single column name results in a separate validation step for each unique
+        value in that column; a tuple of `(column, values)` restricts segmentation to the listed
+        values. The segmentation is performed after any `pre=` preprocessing.
+
+        Thresholds
+        ----------
+        The `thresholds=` parameter is used to set the failure-condition levels for the validation
+        step. If they are set here at the step level, these thresholds will override any thresholds
+        set at the global level in `Validate(thresholds=...)`.
+
+        There are three threshold levels: 'warning', 'error', and 'critical'. The threshold values
+        can either be set as a proportion failing of all test units (a value between `0` to `1`),
+        or, the absolute number of failing test units (as integer that's `1` or greater).
+
+        Thresholds can be defined using one of these input schemes:
+
+        1. use the [`Thresholds`](`pointblank.Thresholds`) class (the most direct way to create
+        thresholds)
+        2. provide a tuple of 1-3 values, where position `0` is the 'warning' level, position `1` is
+        the 'error' level, and position `2` is the 'critical' level
+        3. create a dictionary of 1-3 value entries; the valid keys: are 'warning', 'error', and
+        'critical'
+        4. a single integer/float value denoting absolute number or fraction of failing test units
+        for the 'warning' level only
+
+        If the number of failing test units exceeds set thresholds, the validation step will be
+        marked as 'warning', 'error', or 'critical'. All of the threshold levels don't need to be
+        set, you're free to set any combination of them.
+
+        Aside from reporting failure conditions, thresholds can be used to determine the actions to
+        take for each level of failure (using the `actions=` parameter).
+
         Examples
         --------
         ```{python}

From 273999cc9167edfc23b8aa465a6ca7c4d62e8ecf Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 12:46:48 -0400
Subject: [PATCH 42/55] Add objects to reference: section

---
 great-docs.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/great-docs.yml b/great-docs.yml
index b1e6377e7..4f445c591 100644
--- a/great-docs.yml
+++ b/great-docs.yml
@@ -131,6 +131,8 @@ reference:
           members: false
         - name: DraftValidation
           members: false
+        - name: MissingSpec
+          members: true
 
     - title: Contracts and Pipelines
       desc: >
@@ -189,6 +191,10 @@ reference:
         - Validate.col_vals_expr
         - Validate.col_exists
         - Validate.col_pct_null
+        - Validate.col_pct_missing
+        - Validate.col_missing_coded
+        - Validate.col_missing_only_coded
+        - Validate.col_missing_consistent
         - Validate.rows_distinct
         - Validate.rows_complete
         - Validate.col_schema_match

From e46039555769dab63ff76c8e593dc8d46a82b197 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 12:46:50 -0400
Subject: [PATCH 43/55] Update 02-validation-methods.qmd

---
 .../02-validation-methods.qmd                 | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/user_guide/01-validation-plan/02-validation-methods.qmd b/user_guide/01-validation-plan/02-validation-methods.qmd
index 2cc85e623..c420961c1 100644
--- a/user_guide/01-validation-plan/02-validation-methods.qmd
+++ b/user_guide/01-validation-plan/02-validation-methods.qmd
@@ -293,6 +293,78 @@ In summary, `na_pass=` works like this:
 - `na_pass=True`: missing values pass validation regardless of the condition being tested
 - `na_pass=False` (the default): missing values fail validation
 
+### Structured Missingness with `missing=`
+
+`na_pass=` treats missingness as binary, but real-world data often encodes *why* a value is absent
+using sentinel codes (e.g., `-99` for "not asked", `-98` for "refused"). The
+[`MissingSpec`](`pointblank.MissingSpec`) class captures these codes and their reasons, and most
+validation methods accept a `missing=` argument that uses it.
+
+When you pass `missing=` to a `col_vals_*()` method, declared sentinel values (and, by default,
+`Null` values) are *excluded* from the check, so only the "real" values are validated:
+
+```{python}
+import polars as pl
+
+tbl = pl.DataFrame({"age": [34, -98, 41, -99, 29, 200, 55, None]})
+
+age_missing = pb.MissingSpec(reasons={-99: "not_asked", -98: "refused"})
+
+validation = (
+    pb.Validate(data=tbl)
+    .col_vals_between(columns="age", left=0, right=120, missing=age_missing)
+    .interrogate()
+)
+
+validation
+```
+
+Only the real value `200` is out of range; the sentinel codes and the `Null` are excluded and pass.
+In the report, such steps are marked with a compact `MISSING-AWARE` badge, and a one-line summary of
+the codes appears in the step's notes.
+
+Pointblank also provides dedicated missingness validation methods that use a `MissingSpec`:
+
+- [`Validate.col_pct_missing()`](`Validate.col_pct_missing`): assert the percentage of missing
+  values stays within a limit, optionally filtered by a specific `reason=` or `category=`.
+- [`Validate.col_missing_coded()`](`Validate.col_missing_coded`): assert every absence is expressed
+  as a documented code (no uncoded raw `Null` values).
+- [`Validate.col_missing_only_coded()`](`Validate.col_missing_only_coded`): assert a column contains
+  only documented codes and legitimate values (catching undocumented codes like a stray `-95`),
+  paired with an `allowed=` set or a `min_val`/`max_val` range.
+- [`Validate.col_missing_consistent()`](`Validate.col_missing_consistent`): assert related columns
+  share a consistent missingness pattern for a given reason (e.g., a survey skip pattern).
+
+```{python}
+income_missing = pb.MissingSpec(reasons={-99: "not_asked", -1: "below_threshold"})
+
+survey = pl.DataFrame(
+    {
+        "income_source": [1, -99, 2, -99],
+        "income_amount": [50000, -99, 42000, 38000],
+    }
+)
+
+validation = (
+    pb.Validate(data=survey)
+    # No more than 30% of income values may be "not_asked"
+    .col_pct_missing(columns="income_amount", missing=income_missing, reason="not_asked", max_pct=0.30)
+    # If income wasn't asked, both related columns should be coded together
+    .col_missing_consistent(
+        columns=["income_source", "income_amount"], missing=income_missing, when_reason="not_asked"
+    )
+    .interrogate()
+)
+
+validation
+```
+
+`MissingSpec` also offers pre-built factories for common standards (e.g.,
+`pb.MissingSpec.from_cdisc_null_flavors()`, `pb.MissingSpec.from_sas()`,
+`pb.MissingSpec.from_spss()`), and importing metadata from SPSS/Stata/SAS files can auto-generate
+specs via [`MetadataImport.missing_specs()`](`pointblank.MetadataImport`). For a fuller treatment of
+structured-missingness *reporting*, see the *Missing Values Reporting* article.
+
 ## 2. Row-based Validations
 
 Row-based validations focus on examining properties that span across entire rows rather than

From 9e0e1f32f6af82a7df120ac960fe332b9fa2339f Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 12:46:52 -0400
Subject: [PATCH 44/55] Update 02-yaml-reference.qmd

---
 user_guide/03-yaml/02-yaml-reference.qmd | 61 ++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/user_guide/03-yaml/02-yaml-reference.qmd b/user_guide/03-yaml/02-yaml-reference.qmd
index cbe3341b2..a13408beb 100644
--- a/user_guide/03-yaml/02-yaml-reference.qmd
+++ b/user_guide/03-yaml/02-yaml-reference.qmd
@@ -40,6 +40,11 @@ actions:                               # OPTIONAL: Global failure actions
 final_actions:                         # OPTIONAL: Actions triggered after all steps complete
   warning: "Post-validation warning"
   error: "Post-validation error"
+missing_specs:                         # OPTIONAL: Named structured-missingness specs
+  standard_survey:
+    reasons:
+      -99: not_asked
+      -98: refused
 steps:                                 # REQUIRED: List of validation steps
   - validation_method_name
   - validation_method_name:
@@ -191,6 +196,62 @@ Template variables available for action strings:
 - `{level}`: severity level ('warning'/'error'/'critical')
 - `{time}`: timestamp of validation
 
+### Structured Missingness (`missing_specs`)
+
+The optional top-level `missing_specs` key defines named [`MissingSpec`](`pointblank.MissingSpec`)
+objects that steps can reference. Each named spec maps sentinel values to reason labels, and may
+declare `categories`, `null_is_missing`, and `null_reason`:
+
+```yaml
+missing_specs:
+  standard_survey:
+    reasons:
+      -99: not_asked
+      -98: refused
+      -97: dont_know
+    categories:
+      nonresponse: [refused, dont_know]
+    null_is_missing: true        # OPTIONAL (default true)
+    null_reason: unknown         # OPTIONAL (default "unknown")
+```
+
+Steps reference a named spec by name through the `missing:` parameter. This works both on the
+`col_vals_*` methods (to exclude sentinel values from a check) and on the dedicated missingness
+methods (`col_pct_missing`, `col_missing_coded`, `col_missing_only_coded`, `col_missing_consistent`):
+
+```yaml
+missing_specs:
+  standard_survey:
+    reasons:
+      -99: not_asked
+      -98: refused
+
+steps:
+  - col_vals_between:
+      columns: age
+      left: 0
+      right: 120
+      missing: standard_survey       # excludes -99/-98 (and nulls) from the range check
+  - col_pct_missing:
+      columns: age
+      missing: standard_survey
+      reason: refused
+      max_pct: 0.30
+```
+
+A step can also define a spec inline (an anonymous mapping) instead of referencing a named one:
+
+```yaml
+steps:
+  - col_pct_missing:
+      columns: age
+      max_pct: 0.5
+      missing:
+        reasons:
+          -99: not_asked
+          -98: refused
+```
+
 ## Validation Methods Reference
 
 ### Column Value Validations

From b9f1e7a00eb542246cbc288fa5553fbf204b3d68 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 12:47:05 -0400
Subject: [PATCH 45/55] Update 03-missing-vals-tbl.qmd

---
 .../03-missing-vals-tbl.qmd                   | 69 +++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/user_guide/05-data-inspection/03-missing-vals-tbl.qmd b/user_guide/05-data-inspection/03-missing-vals-tbl.qmd
index 951392099..a4fafbf76 100644
--- a/user_guide/05-data-inspection/03-missing-vals-tbl.qmd
+++ b/user_guide/05-data-inspection/03-missing-vals-tbl.qmd
@@ -81,3 +81,72 @@ pb.missing_vals_tbl(game_revenue)
 We see nothing but light blue in this report! The header also indicates that there are no missing
 values by displaying a large green check mark (the other report tables provided a count of total
 missing values across all columns).
+
+## Structured Missingness by Reason
+
+So far we've treated missingness as binary: a value is either `Null` or it isn't. But real-world
+data often encodes *why* a value is absent. Survey data distinguishes *refused* from *not asked*
+from *don't know*; clinical and statistical-package data use sentinel codes like `-99`, `".A"`, or
+`"NOT DONE"`. Pointblank captures this with the [`MissingSpec`](`pointblank.MissingSpec`) class,
+which maps sentinel values to human-readable *reasons*.
+
+When you pass a `missing=` mapping of column names to `MissingSpec` objects, `missing_vals_tbl()`
+switches from the sector heatmap to a *structured breakdown*: one row per column with the count and
+percentage of complete values and of each missing reason.
+
+```{python}
+import polars as pl
+
+survey = pl.DataFrame(
+    {
+        "age": [34, -98, 41, -99, 29, -98, 55, None],
+        "income": [50000, -99, -1, None, 42000, -99, 38000, 61000],
+    }
+)
+
+specs = {
+    "age": pb.MissingSpec(reasons={-99: "not_asked", -98: "refused", -97: "dont_know"}),
+    "income": pb.MissingSpec(reasons={-99: "not_asked", -1: "below_threshold"}),
+}
+
+pb.missing_vals_tbl(survey, missing=specs)
+```
+
+Each `MissingSpec` declares the sentinel values for a column and the reason each one represents. By
+default, actual `Null` values are also counted as missing (with the reason `"unknown"`); set
+`null_is_missing=False` on the spec if raw nulls should be treated as real values instead.
+
+### Viewing the pattern as a heatmap
+
+For a more visual read of *where* missingness concentrates, pass `as_heatmap=True`. The reason
+columns are then shaded from light to dark by the proportion missing:
+
+```{python}
+pb.missing_vals_tbl(survey, missing=specs, as_heatmap=True)
+```
+
+### Pre-built specs for common standards
+
+You don't always have to define reasons by hand. `MissingSpec` provides factory methods for common
+encodings, including CDISC/HL7 null flavors and SAS special missing values:
+
+```{python}
+cdisc = pb.MissingSpec.from_cdisc_null_flavors()
+print("NASK ->", cdisc.reason_for("NASK"))   # not_asked
+print("UNK  ->", cdisc.reason_for("UNK"))     # unknown
+```
+
+When metadata is imported from SPSS, Stata, or SAS files (see the *Metadata Import* section),
+[`MetadataImport.missing_specs()`](`pointblank.MetadataImport`) auto-generates a `{column:
+MissingSpec}` mapping from the variables' declared missing values, ready to pass straight to
+`missing_vals_tbl()`.
+
+::: {.callout-note}
+The same `MissingSpec` objects power missingness-aware *validation*, not just reporting. You can
+pass `missing=` to the `col_vals_*()` methods (to exclude sentinel values from a check) and use the
+dedicated [`col_pct_missing()`](`pointblank.Validate.col_pct_missing`),
+[`col_missing_coded()`](`pointblank.Validate.col_missing_coded`),
+[`col_missing_only_coded()`](`pointblank.Validate.col_missing_only_coded`), and
+[`col_missing_consistent()`](`pointblank.Validate.col_missing_consistent`) validation steps. See the
+*Validation Methods* article for details.
+:::

From b3d702b1e849aa96eb6ff6dc79e85724d3416b3d Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 12:47:11 -0400
Subject: [PATCH 46/55] Update 02-statistical-packages.qmd

---
 .../02-statistical-packages.qmd               | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/user_guide/11-metadata-import/02-statistical-packages.qmd b/user_guide/11-metadata-import/02-statistical-packages.qmd
index 25d85e1eb..9b40338b2 100644
--- a/user_guide/11-metadata-import/02-statistical-packages.qmd
+++ b/user_guide/11-metadata-import/02-statistical-packages.qmd
@@ -130,6 +130,36 @@ them appropriately. When validation is generated, these codes are documented in
 rather than generating explicit exclusion rules, since the correct handling depends on your
 analysis context.
 
+#### Turning missing codes into `MissingSpec` objects
+
+To put these codes to work in validation and reporting, convert them into
+[`MissingSpec`](`pointblank.MissingSpec`) objects. The
+[`MetadataImport.missing_specs()`](`pointblank.MetadataImport`) method does this for every variable
+that declares missing values, returning a `{column: MissingSpec}` mapping (the reason labels are
+derived from the variables' value labels):
+
+```python
+meta = pb.import_metadata("survey.sav")
+
+# Auto-generate a {column: MissingSpec} mapping from the declared missing values
+specs = meta.missing_specs()
+
+# Use the specs in a structured missingness report...
+pb.missing_vals_tbl(data, missing=specs)
+
+# ...or in missingness-aware validation
+validation = (
+    pb.Validate(data=data)
+    .col_vals_between(columns="age", left=0, right=120, missing=specs["age"])
+    .interrogate()
+)
+```
+
+You can also build a spec for a single variable with
+[`VariableMetadata.to_missing_spec()`](`pointblank.VariableMetadata`), or construct one directly
+from SPSS-style values via `pb.MissingSpec.from_spss(missing_values=[...], labels={...})`. See the
+*Missing Values Reporting* and *Validation Methods* articles for what you can do with these specs.
+
 ### Type Detection from Formats
 
 SPSS stores numeric variables with format strings that indicate how they should be displayed. These

From 7ea9b688b80987742b86c5c8b4d525990b383dbf Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 12:47:13 -0400
Subject: [PATCH 47/55] Update 03-cdisc-validation.qmd

---
 .../03-cdisc-validation.qmd                   | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/user_guide/11-metadata-import/03-cdisc-validation.qmd b/user_guide/11-metadata-import/03-cdisc-validation.qmd
index eb1a32e21..e8d8d4308 100644
--- a/user_guide/11-metadata-import/03-cdisc-validation.qmd
+++ b/user_guide/11-metadata-import/03-cdisc-validation.qmd
@@ -531,6 +531,27 @@ This layered approach gives you the flexibility to apply different levels of val
 on your needs. The Define-XML checks enforce what was specifically documented for your study,
 while the SDTM template checks enforce the broader standard requirements that apply universally.
 
+## Null Flavors and Structured Missingness
+
+Clinical data uses standardized HL7/CDISC *null flavors* to record *why* a value is absent (e.g.,
+`"NASK"` = not asked, `"UNK"` = unknown, `"NA"` = not applicable). Pointblank ships a pre-built
+[`MissingSpec`](`pointblank.MissingSpec`) for these codes via
+`MissingSpec.from_cdisc_null_flavors()`:
+
+```{python}
+cdisc = pb.MissingSpec.from_cdisc_null_flavors()
+
+print("NASK ->", cdisc.reason_for("NASK"))   # not_asked
+print("UNK  ->", cdisc.reason_for("UNK"))     # unknown
+print("boundary codes:", cdisc.values_for_category("boundary"))
+```
+
+This spec can be passed to `missing_vals_tbl()` for a reason-by-reason breakdown, or to the
+`col_vals_*()` and dedicated missingness validation methods (`col_pct_missing()`,
+`col_missing_coded()`, `col_missing_only_coded()`, `col_missing_consistent()`) to validate data
+while accounting for the null flavor codes. See the *Missing Values Reporting* and *Validation
+Methods* articles for the full set of capabilities.
+
 ## Conclusion
 
 CDISC data validation with Pointblank covers the full spectrum of clinical trial data management:

From 54861ae909ce2826e8c72f9e04df3e31bd0e9a0e Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 15:06:45 -0400
Subject: [PATCH 48/55] Improve appearance of missing vals table

---
 pointblank/validate.py                        | 166 +++++++++++-------
 tests/test_missing_vals_tbl_structured.py     |  97 +++++++++-
 .../03-missing-vals-tbl.qmd                   |  24 ++-
 3 files changed, 210 insertions(+), 77 deletions(-)

diff --git a/pointblank/validate.py b/pointblank/validate.py
index 4d3f9464e..084211b98 100644
--- a/pointblank/validate.py
+++ b/pointblank/validate.py
@@ -2680,12 +2680,6 @@ def _generate_display_table(
     return gt_tbl
 
 
-def _prettify_reason_label(reason: str) -> str:
-    """Turn a snake_case reason label into a Title Case display label (e.g. 'not_asked' ->
-    'Not Asked')."""
-    return reason.replace("_", " ").title()
-
-
 def _build_structured_missing_tbl(
     data: Any, missing: dict[str, MissingSpec], as_heatmap: bool = False
 ) -> GT:
@@ -2711,13 +2705,18 @@ def _build_structured_missing_tbl(
 
     available_columns = list(nw_frame.columns)
 
-    # Build the ordered union of reason labels across all specs (first-seen order)
+    # Build the ordered union of *declared* (coded) reason labels across all specs (first-seen
+    # order). Raw Null/None/NA values are tallied separately in a fixed "Null" column rather than
+    # being treated as a reason, since they are not part of any MissingSpec.
     reason_order: list[str] = []
     for spec in missing.values():
-        for r in spec.reasons_list():
+        for r in spec.reasons.values():
             if r not in reason_order:
                 reason_order.append(r)
 
+    # A "Null" column is shown only if at least one spec counts raw nulls as missing
+    has_null_col = any(spec.null_is_missing for spec in missing.values())
+
     records: list[dict[str, Any]] = []
     for column, spec in missing.items():
         if column not in available_columns:
@@ -2725,60 +2724,56 @@ def _build_structured_missing_tbl(
                 f"Column '{column}' given in `missing=` was not found in the table."
             )
 
-        # Build one aggregation per reason that has an expression (sentinels and/or nulls)
+        # One aggregation per declared reason (sentinel values only), plus a separate raw-null
+        # count when the spec treats nulls as missing; coded reasons and raw nulls are kept distinct
+        declared_reasons = list(dict.fromkeys(spec.reasons.values()))
         select_exprs: dict[str, Any] = {"__total__": nw.len()}
         reason_alias: dict[str, str] = {}
-        for i, r in enumerate(spec.reasons_list()):
-            sentinels = spec.values_for_reason(r)
-            expr = None
-            if sentinels:
-                expr = nw.col(column).is_in(sentinels)
-            if r == spec.null_reason and spec.null_is_missing:
-                null_expr = nw.col(column).is_null()
-                expr = null_expr if expr is None else (expr | null_expr)
-            if expr is not None:
-                alias = f"__r{i}__"
-                reason_alias[r] = alias
-                select_exprs[alias] = expr.cast(nw.Int32).sum()
+        for i, r in enumerate(declared_reasons):
+            reason_alias[r] = f"__r{i}__"
+            select_exprs[reason_alias[r]] = (
+                nw.col(column).is_in(spec.values_for_reason(r)).cast(nw.Int32).sum()
+            )
+        if spec.null_is_missing:
+            select_exprs["__null__"] = nw.col(column).is_null().cast(nw.Int32).sum()
 
         out = nw_frame.select(**select_exprs)
         if is_lazy:
             out = out.collect()
 
         total = int(out["__total__"][0])
-        counts: dict[str, int] = {}
-        for r in spec.reasons_list():
-            counts[r] = int(out[reason_alias[r]][0]) if r in reason_alias else 0
+        coded_counts = {r: int(out[reason_alias[r]][0]) for r in declared_reasons}
+        n_null = int(out["__null__"][0]) if spec.null_is_missing else 0
 
-        total_missing = sum(counts.values())
+        total_missing = sum(coded_counts.values()) + n_null
         complete = total - total_missing
 
+        # A coded reason only *applies* to a column if its spec declares it; non-applicable reasons
+        # render as an em dash (not "0"). The "Null" column applies only when null_is_missing=True.
+        applicable = set(declared_reasons)
+
         def _prop(count: int) -> float:
             return (count / total) if total > 0 else 0.0
 
         if as_heatmap:
-            # Numeric proportions (0..1) so cells can be color-shaded by missingness
-            record: dict[str, Any] = {
-                "columns": column,
-                "total_n": str(total),
-                "complete": _prop(complete),
-            }
+            # Numeric proportions (0..1) so reason cells can be color-shaded; non-applicable cells
+            # are left as None (shown as an em dash, uncolored)
+            record: dict[str, Any] = {"columns": column, "complete": _prop(complete)}
             for r in reason_order:
-                record[r] = _prop(counts.get(r, 0))
+                record[r] = _prop(coded_counts.get(r, 0)) if r in applicable else None
+            if has_null_col:
+                record["null"] = _prop(n_null) if spec.null_is_missing else None
         else:
 
             def _fmt(count: int) -> str:
                 pct = round(100 * count / total) if total > 0 else 0
                 return f"{count} ({pct}%)"
 
-            record = {
-                "columns": column,
-                "total_n": str(total),
-                "complete": _fmt(complete),
-            }
-            # Fill every reason column in the union (0 for reasons this spec doesn't define)
+            record = {"columns": column, "complete": _fmt(complete)}
             for r in reason_order:
-                record[r] = _fmt(counts.get(r, 0))
+                record[r] = _fmt(coded_counts.get(r, 0)) if r in applicable else "—"
+            if has_null_col:
+                record["null"] = _fmt(n_null) if spec.null_is_missing else "—"
         records.append(record)
 
     # Build a DataFrame from the records using the available DataFrame library
@@ -2792,26 +2787,52 @@ def _fmt(count: int) -> str:
 
         breakdown_df = pd.DataFrame(records)
 
-    cols_labels = {
-        "columns": "Column",
-        "total_n": "Total N",
-        "complete": "Complete",
-    }
-    for r in reason_order:
-        cols_labels[r] = _prettify_reason_label(r)
+    # Reason columns keep their raw input form as labels (e.g. "not_asked", not "Not Asked"); the
+    # fixed columns are relabeled. The total row count is already shown in the header, so there's no
+    # redundant "Total N" column. Raw nulls appear in a fixed "Null" column (styled like "Complete"),
+    # not as a reason.
+    cols_labels = {"columns": "Column", "complete": "Complete"}
+    if has_null_col:
+        cols_labels["null"] = "Null"
+
+    value_columns = ["complete"] + reason_order + (["null"] if has_null_col else [])
+
+    # Build a header that matches the default `missing_vals_tbl()` look: a plain (large) title in
+    # IBM Plex Sans and a subtitle showing the table type and dimensions
+    tbl_type = _get_tbl_type(data=data)
+    n_rows_total = get_row_count(data)
+    table_type_html = _create_table_type_html(tbl_type=tbl_type, tbl_name=None, font_size="10px")
+    tbl_dims_html = _create_table_dims_html(
+        columns=len(available_columns), rows=n_rows_total, font_size="10px"
+    )
+    combined_subtitle = (
+        "<div>"
+        '<div style="padding-top: 0; padding-bottom: 7px;">'
+        f"{table_type_html}"
+        f"{tbl_dims_html}"
+        "</div>"
+        "</div>"
+    )
+
+    # The left "Column" column is rendered in monospace, matching the default report's body font
+    column_name_style = style.text(
+        color="black", font=google_font(name="IBM Plex Mono"), size="12px"
+    )
+    # The reason column labels keep their raw input form and are shown in monospace
+    reason_label_style = style.text(font=google_font(name="IBM Plex Mono"), size="12px")
 
-    value_columns = ["total_n", "complete"] + reason_order
+    # Columns that should show an em dash for non-applicable cells (reason columns + the Null column)
+    em_dash_columns = reason_order + (["null"] if has_null_col else [])
 
     if as_heatmap:
         title = "Missing Pattern Heatmap"
-        subtitle = "Proportion of each missing reason per column (darker = more missing)."
-        prop_columns = ["complete"] + reason_order
+        # "complete" and "null" are shown as plain percentages (uncolored, like the default report);
+        # only the coded reason columns are color-shaded by proportion
+        prop_columns = ["complete"] + reason_order + (["null"] if has_null_col else [])
 
         gt_tbl = (
             GT(breakdown_df)
-            .tab_header(
-                title=html(f"<div style='font-size: 14px;'>{title}</div>"), subtitle=subtitle
-            )
+            .tab_header(title=title, subtitle=html(combined_subtitle))
             .opt_table_font(font=google_font(name="IBM Plex Sans"))
             .opt_align_table_header(align="left")
             .cols_label(cases=cols_labels)
@@ -2822,21 +2843,18 @@ def _fmt(count: int) -> str:
                 columns=reason_order,
                 palette=["#F5F5F5", "#000000"],
                 domain=[0, 1],
+                na_color="#FFFFFF",
             )
-            .tab_style(
-                style=style.text(weight="bold"),
-                locations=loc.body(columns="columns"),
-            )
+            .sub_missing(columns=em_dash_columns, missing_text="—")
+            .tab_style(style=column_name_style, locations=loc.body(columns="columns"))
+            .tab_style(style=reason_label_style, locations=loc.column_labels(columns=reason_order))
         )
     else:
         title = "Missing Values by Reason"
-        subtitle = "Counts and percentages of complete values and each missing reason, per column."
 
         gt_tbl = (
             GT(breakdown_df)
-            .tab_header(
-                title=html(f"<div style='font-size: 14px;'>{title}</div>"), subtitle=subtitle
-            )
+            .tab_header(title=title, subtitle=html(combined_subtitle))
             .opt_table_font(font=google_font(name="IBM Plex Sans"))
             .opt_align_table_header(align="left")
             .cols_label(cases=cols_labels)
@@ -2846,12 +2864,15 @@ def _fmt(count: int) -> str:
                 style=style.text(font=google_font(name="IBM Plex Mono"), size="12px"),
                 locations=loc.body(columns=value_columns),
             )
-            .tab_style(
-                style=style.text(weight="bold"),
-                locations=loc.body(columns="columns"),
-            )
+            .tab_style(style=column_name_style, locations=loc.body(columns="columns"))
+            .tab_style(style=reason_label_style, locations=loc.column_labels(columns=reason_order))
         )
 
+    # Group only the coded reasons under a "Missing Reasons" spanner. Raw nulls live in the fixed
+    # "Null" column (styled like "Complete"), so they aren't mistaken for declared spec reasons.
+    if reason_order:
+        gt_tbl = gt_tbl.tab_spanner(label="Missing Reasons", columns=reason_order)
+
     if version("great_tables") >= "0.17.0":
         gt_tbl = gt_tbl.tab_options(quarto_disable_processing=True)
 
@@ -2873,7 +2894,17 @@ def missing_vals_tbl(
     column. When a `missing=` mapping of columns to [`MissingSpec`](`pointblank.MissingSpec`) objects
     is supplied, the function instead renders a *structured missingness* breakdown: one row per
     column with the count and percentage of complete values and of each missing *reason* (e.g.,
-    "Refused", "Not Asked", "Unknown").
+    `refused`, `not_asked`). Declared (coded) reasons are grouped under a "Missing Reasons" spanner
+    and keep their raw input form as labels; actual `Null`/`None`/`NA` values (which are not part of
+    the spec) are tallied in a fixed "Null" column at the far right (styled like "Complete"), so
+    they aren't mistaken for declared reasons.
+
+    Note that supplying `missing=` produces a *different report* than the default view: it is a
+    distinct visualization (a per-reason breakdown table, or a per-reason heatmap with
+    `as_heatmap=True`), not an annotated version of the default sector heatmap. The report titles
+    differ accordingly ("Missing Values" for the default, "Missing Values by Reason" or "Missing
+    Pattern Heatmap" for the structured views), and the shared header/title styling makes the family
+    resemblance clear.
 
     Parameters
     ----------
@@ -2884,7 +2915,10 @@ def missing_vals_tbl(
     missing
         An optional dictionary mapping column names to [`MissingSpec`](`pointblank.MissingSpec`)
         objects. When provided, the function renders a structured breakdown of missingness by
-        reason for the specified columns (rather than the default sector heatmap).
+        reason for the specified columns (rather than the default sector heatmap). The reason
+        columns are the union of reasons across the supplied specs; a reason that isn't defined for
+        a given column is shown as an em dash (not applicable), as distinct from a defined-but-unobserved
+        reason (shown as `0 (0%)`).
     as_heatmap
         Only applies when `missing=` is provided. When `True`, render the per-reason proportions as
         a color-coded heatmap (cells shaded from light to dark by the proportion missing) instead of
diff --git a/tests/test_missing_vals_tbl_structured.py b/tests/test_missing_vals_tbl_structured.py
index fde123bbe..ae46963b0 100644
--- a/tests/test_missing_vals_tbl_structured.py
+++ b/tests/test_missing_vals_tbl_structured.py
@@ -31,16 +31,55 @@ def test_returns_gt(self, tbl_pl, specs):
 
     def test_reason_columns_present(self, tbl_pl, specs):
         html = pb.missing_vals_tbl(tbl_pl, missing=specs).as_raw_html()
+        # Coded reason labels keep their raw input form (snake_case), grouped under a spanner
         for token in [
-            "Not Asked",
-            "Refused",
-            "Dont Know",
-            "Below Threshold",
-            "Unknown",
+            "not_asked",
+            "refused",
+            "dont_know",
+            "below_threshold",
             "Complete",
-            "Total N",
+            "Null",  # fixed column for raw nulls (not a reason)
+            "Missing Reasons",  # spanner over the coded reason columns only
         ]:
             assert token in html
+        # Labels are not prettified to Title Case
+        assert "Not Asked" not in html and "Below Threshold" not in html
+        # The redundant "Total N" column was removed (row count is in the header)
+        assert "Total N" not in html
+
+    def test_null_is_fixed_column_not_a_reason(self, tbl_pl, specs):
+        # Raw nulls appear in a fixed "Null" column, not as an "unknown" reason under the spanner
+        html = pb.missing_vals_tbl(tbl_pl, missing=specs).as_raw_html()
+        assert "Null" in html
+        assert "unknown" not in html  # the null_reason label is not shown
+        # "Null" is a fixed column to the right of the coded reasons
+        gt = pb.missing_vals_tbl(tbl_pl, missing=specs)
+        cols = list(gt._tbl_data.columns)
+        assert cols[-1] == "null"
+        assert cols.index("null") > cols.index("below_threshold")
+
+    def test_no_null_column_when_null_not_missing(self):
+        # null_is_missing=False -> no "Null" column and no "unknown" text
+        tbl = pl.DataFrame({"age": [34, -98, 41, None]})
+        spec = {"age": pb.MissingSpec(reasons={-98: "refused"}, null_is_missing=False)}
+        gt = pb.missing_vals_tbl(tbl, missing=spec)
+        assert "null" not in list(gt._tbl_data.columns)
+        html = gt.as_raw_html()
+        assert "unknown" not in html
+
+    def test_null_column_em_dash_when_not_applicable(self):
+        # When one spec counts nulls and another doesn't, the Null column shows an em dash for the
+        # column whose spec sets null_is_missing=False
+        tbl = pl.DataFrame({"a": [1, -99, None], "b": [1, -99, None]})
+        specs = {
+            "a": pb.MissingSpec(reasons={-99: "not_asked"}),  # null_is_missing=True
+            "b": pb.MissingSpec(reasons={-99: "not_asked"}, null_is_missing=False),
+        }
+        gt = pb.missing_vals_tbl(tbl, missing=specs)
+        null_vals = list(gt._tbl_data["null"])
+        # column "a" counts its 1 null; column "b" is not applicable (em dash)
+        assert null_vals[0] == "1 (33%)"
+        assert null_vals[1] == "—"
 
     def test_counts_correct(self, tbl_pl):
         # age: total 8 -> refused 2 (25%), not_asked 1 (12%), dont_know 0 (0%),
@@ -57,7 +96,7 @@ def test_null_excluded_when_spec_says_so(self):
         tbl = pl.DataFrame({"age": [34, -98, 41, None]})
         spec = pb.MissingSpec(reasons={-98: "refused"}, null_is_missing=False)
         html = pb.missing_vals_tbl(tbl, missing={"age": spec}).as_raw_html()
-        assert "Unknown" not in html
+        assert "unknown" not in html
         # complete = 3 (null + 2 reals) of 4 = 75%
         assert "3 (75%)" in html
 
@@ -94,7 +133,8 @@ def test_heatmap_returns_gt(self, tbl_pl, specs):
     def test_heatmap_title_and_labels(self, tbl_pl, specs):
         html = pb.missing_vals_tbl(tbl_pl, missing=specs, as_heatmap=True).as_raw_html()
         assert "Missing Pattern Heatmap" in html
-        assert "Refused" in html and "Below Threshold" in html
+        assert "refused" in html and "below_threshold" in html
+        assert "Missing Reasons" in html  # spanner over reason columns
         assert "%" in html  # proportions formatted as percentages
 
     def test_heatmap_pandas(self, specs):
@@ -109,3 +149,44 @@ def test_heatmap_pandas(self, specs):
     def test_as_heatmap_ignored_without_missing(self, tbl_pl):
         # as_heatmap only applies with missing=; default sector view still returned
         assert isinstance(pb.missing_vals_tbl(tbl_pl, as_heatmap=True), GT)
+
+
+class TestStyledLikeOriginal:
+    """The structured/heatmap outputs should reuse the original report's title style and the
+    monospaced left Column column."""
+
+    def test_table_mode_styling(self, tbl_pl, specs):
+        html = pb.missing_vals_tbl(tbl_pl, missing=specs).as_raw_html()
+        # Monospaced font present (left Column column + value columns)
+        assert "IBM Plex Mono" in html
+        # Header carries the table type + dimensions subtitle (as the default report does)
+        assert "rows" in html.lower() or "columns" in html.lower()
+        # Plain title (no shrunk font-size wrapper as before)
+        assert "<div style='font-size: 14px;'>Missing Values by Reason" not in html
+
+    def test_heatmap_mode_styling(self, tbl_pl, specs):
+        html = pb.missing_vals_tbl(tbl_pl, missing=specs, as_heatmap=True).as_raw_html()
+        assert "IBM Plex Mono" in html
+        assert "<div style='font-size: 14px;'>Missing Pattern Heatmap" not in html
+
+
+class TestNonApplicableReasons:
+    """Reasons not defined in a column's spec should render as an em dash, not '0 (0%)'."""
+
+    def test_table_mode_em_dash(self, tbl_pl, specs):
+        html = pb.missing_vals_tbl(tbl_pl, missing=specs).as_raw_html()
+        # age has no "below_threshold"; income has no "refused"/"dont_know" -> 3 em dashes
+        assert html.count("—") == 3
+        # age DOES define "dont_know" but observes none -> should still show "0 (0%)"
+        assert "0 (0%)" in html
+
+    def test_heatmap_mode_em_dash(self, tbl_pl, specs):
+        html = pb.missing_vals_tbl(tbl_pl, missing=specs, as_heatmap=True).as_raw_html()
+        assert html.count("—") == 3
+
+    def test_single_spec_no_em_dash(self):
+        # With one spec, every reason in the union applies -> no em dashes
+        tbl = pl.DataFrame({"age": [34, -98, 41, -99]})
+        spec = {"age": pb.MissingSpec(reasons={-99: "not_asked", -98: "refused"})}
+        html = pb.missing_vals_tbl(tbl, missing=spec).as_raw_html()
+        assert "—" not in html
diff --git a/user_guide/05-data-inspection/03-missing-vals-tbl.qmd b/user_guide/05-data-inspection/03-missing-vals-tbl.qmd
index a4fafbf76..f47c264ff 100644
--- a/user_guide/05-data-inspection/03-missing-vals-tbl.qmd
+++ b/user_guide/05-data-inspection/03-missing-vals-tbl.qmd
@@ -94,6 +94,16 @@ When you pass a `missing=` mapping of column names to `MissingSpec` objects, `mi
 switches from the sector heatmap to a *structured breakdown*: one row per column with the count and
 percentage of complete values and of each missing reason.
 
+::: {.callout-note}
+## Supplying `missing=` produces a different report
+
+The structured breakdown is a *distinct visualization*, not an annotated version of the default
+sector heatmap. Adding `missing=` changes the table's whole layout. The report title changes too
+(from "Missing Values" to "Missing Values by Reason", or "Missing Pattern Heatmap" with
+`as_heatmap=True`), and the shared title styling and monospaced column list keep the two views
+recognizably part of the same family.
+:::
+
 ```{python}
 import polars as pl
 
@@ -112,9 +122,17 @@ specs = {
 pb.missing_vals_tbl(survey, missing=specs)
 ```
 
-Each `MissingSpec` declares the sentinel values for a column and the reason each one represents. By
-default, actual `Null` values are also counted as missing (with the reason `"unknown"`); set
-`null_is_missing=False` on the spec if raw nulls should be treated as real values instead.
+Each `MissingSpec` declares the sentinel values for a column and the reason each one represents.
+Those declared (coded) reasons are grouped under the **Missing Reasons** spanner. By default, actual
+`Null` values are also counted as missing; because those are raw `Null`/`None`/`NA` values and *not*
+part of the spec, they're tallied in a fixed **Null** column at the far right (styled like
+**Complete**), rather than as a reason. Set `null_is_missing=False` on the spec if raw nulls should
+be treated as real values instead — then there's no **Null** column at all.
+
+The reason columns are the *union* of reasons across all the specs you provide. When a reason isn't
+defined for a particular column, that cell shows an em dash (`—`) rather than `0`. This signals
+"not applicable to this column", as distinct from a reason that *is* defined but simply wasn't
+observed (which shows `0 (0%)`).
 
 ### Viewing the pattern as a heatmap
 

From e5c12777d56f6477cba060dc21bfd3c61c56f2e8 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 15:37:45 -0400
Subject: [PATCH 49/55] Perform some code formatting

---
 pointblank/missing.py  | 11 +++++++----
 pointblank/validate.py | 35 +++++++++++++++--------------------
 pointblank/yaml.py     |  1 +
 3 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/pointblank/missing.py b/pointblank/missing.py
index de533d560..4d2814aac 100644
--- a/pointblank/missing.py
+++ b/pointblank/missing.py
@@ -140,9 +140,7 @@ def _validate(self) -> None:
                 )
 
         if not isinstance(self.null_reason, str):
-            raise TypeError(
-                f"null_reason must be a string, got {type(self.null_reason).__name__}."
-            )
+            raise TypeError(f"null_reason must be a string, got {type(self.null_reason).__name__}.")
 
         if self.categories is not None:
             if not isinstance(self.categories, dict):
@@ -309,7 +307,12 @@ def from_cdisc_null_flavors(
         """
         reasons = dict(_CDISC_NULL_FLAVORS)
         categories = {
-            "unknown": ["no_information", "unknown", "asked_but_unknown", "temporarily_unavailable"],
+            "unknown": [
+                "no_information",
+                "unknown",
+                "asked_but_unknown",
+                "temporarily_unavailable",
+            ],
             "not_applicable": ["not_applicable", "not_asked", "not_present"],
             "boundary": ["positive_infinity", "negative_infinity"],
         }
diff --git a/pointblank/validate.py b/pointblank/validate.py
index 084211b98..2787bed5d 100644
--- a/pointblank/validate.py
+++ b/pointblank/validate.py
@@ -2720,9 +2720,7 @@ def _build_structured_missing_tbl(
     records: list[dict[str, Any]] = []
     for column, spec in missing.items():
         if column not in available_columns:
-            raise ValueError(
-                f"Column '{column}' given in `missing=` was not found in the table."
-            )
+            raise ValueError(f"Column '{column}' given in `missing=` was not found in the table.")
 
         # One aggregation per declared reason (sentinel values only), plus a separate raw-null
         # count when the spec treats nulls as missing; coded reasons and raw nulls are kept distinct
@@ -10814,9 +10812,7 @@ def col_pct_missing(
         _check_active_input(param=active, param_name="active")
 
         if not isinstance(missing, MissingSpec):
-            raise TypeError(
-                f"`missing=` must be a MissingSpec, got {type(missing).__name__}."
-            )
+            raise TypeError(f"`missing=` must be a MissingSpec, got {type(missing).__name__}.")
 
         if reason is not None and category is not None:
             raise ValueError("Only one of `reason=` or `category=` can be specified.")
@@ -11026,9 +11022,7 @@ def col_missing_coded(
         _check_active_input(param=active, param_name="active")
 
         if not isinstance(missing, MissingSpec):
-            raise TypeError(
-                f"`missing=` must be a MissingSpec, got {type(missing).__name__}."
-            )
+            raise TypeError(f"`missing=` must be a MissingSpec, got {type(missing).__name__}.")
 
         # Determine threshold to use (global or local) and normalize a local `thresholds=` value
         thresholds = (
@@ -11214,9 +11208,7 @@ def col_missing_only_coded(
         _check_active_input(param=active, param_name="active")
 
         if not isinstance(missing, MissingSpec):
-            raise TypeError(
-                f"`missing=` must be a MissingSpec, got {type(missing).__name__}."
-            )
+            raise TypeError(f"`missing=` must be a MissingSpec, got {type(missing).__name__}.")
 
         if allowed is None and min_val is None and max_val is None:
             raise ValueError(
@@ -11916,17 +11908,13 @@ def col_missing_consistent(
         _check_active_input(param=active, param_name="active")
 
         if not isinstance(missing, MissingSpec):
-            raise TypeError(
-                f"`missing=` must be a MissingSpec, got {type(missing).__name__}."
-            )
+            raise TypeError(f"`missing=` must be a MissingSpec, got {type(missing).__name__}.")
 
         if isinstance(columns, str):
             columns = [columns]
         columns = list(columns)
         if len(columns) < 2:
-            raise ValueError(
-                "`col_missing_consistent()` requires at least two columns to compare."
-            )
+            raise ValueError("`col_missing_consistent()` requires at least two columns to compare.")
 
         # Resolve which sentinel values (and whether nulls) represent `when_reason`
         sentinels = missing.values_for_reason(when_reason)
@@ -18981,7 +18969,10 @@ def get_step_report(
         # if get_row_count(extract) == 0:
         #    return "No rows were extracted."
 
-        if assertion_type in ROW_BASED_VALIDATION_TYPES + ["rows_complete", "col_missing_consistent"]:
+        if assertion_type in ROW_BASED_VALIDATION_TYPES + [
+            "rows_complete",
+            "col_missing_consistent",
+        ]:
             # Get the extracted data for the step
             extract = self.get_data_extracts(i=i, frame=True)
 
@@ -19069,7 +19060,11 @@ def get_step_report(
         if step_spec is None and isinstance(values, MissingSpec):
             # col_missing_coded stores the spec directly in `values`
             step_spec = values
-        if step_spec is None and isinstance(values, dict) and isinstance(values.get("spec"), MissingSpec):
+        if (
+            step_spec is None
+            and isinstance(values, dict)
+            and isinstance(values.get("spec"), MissingSpec)
+        ):
             # col_missing_only_coded and col_missing_consistent stash the spec under `values["spec"]`
             step_spec = values["spec"]
         if step_spec is not None and step_report is not None:
diff --git a/pointblank/yaml.py b/pointblank/yaml.py
index 6a9e39c26..22ef133a8 100644
--- a/pointblank/yaml.py
+++ b/pointblank/yaml.py
@@ -41,6 +41,7 @@ def _missing_spec_to_code(spec: MissingSpec) -> str:
         parts.append(f"description={spec.description!r}")
     return f"pb.MissingSpec({', '.join(parts)})"
 
+
 if TYPE_CHECKING:
     from typing import Literal
 

From 09f75520851a314719ec45901e204a91ba553c8d Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 15:37:49 -0400
Subject: [PATCH 50/55] Update test_col_missing_consistent.py

---
 tests/test_col_missing_consistent.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/tests/test_col_missing_consistent.py b/tests/test_col_missing_consistent.py
index 50c424ba4..1bcedce26 100644
--- a/tests/test_col_missing_consistent.py
+++ b/tests/test_col_missing_consistent.py
@@ -31,9 +31,7 @@ def test_basic_inconsistency(self, spec):
         assert info.n_failed == 1  # last row: only one column is -99
 
     def test_all_consistent_passes(self, spec):
-        tbl = pl.DataFrame(
-            {"a": [1, -99, 2, -99], "b": [5, -99, 6, -99]}
-        )
+        tbl = pl.DataFrame({"a": [1, -99, 2, -99], "b": [5, -99, 6, -99]})
         v = (
             pb.Validate(data=tbl)
             .col_missing_consistent(columns=["a", "b"], missing=spec, when_reason="not_asked")
@@ -54,9 +52,7 @@ def test_null_reason_consistency(self):
         assert _info(v).n_failed == 1
 
     def test_three_columns(self, spec):
-        tbl = pl.DataFrame(
-            {"a": [-99, 1, -99], "b": [-99, 2, -99], "c": [-99, 3, 7]}
-        )
+        tbl = pl.DataFrame({"a": [-99, 1, -99], "b": [-99, 2, -99], "c": [-99, 3, 7]})
         v = (
             pb.Validate(data=tbl)
             .col_missing_consistent(columns=["a", "b", "c"], missing=spec, when_reason="not_asked")
@@ -80,9 +76,7 @@ def test_missing_must_be_spec(self):
             )
 
     def test_pandas_backend(self, spec):
-        tbl = pd.DataFrame(
-            {"a": [1, -99, -99], "b": [5, -99, 6]}
-        )
+        tbl = pd.DataFrame({"a": [1, -99, -99], "b": [5, -99, 6]})
         v = (
             pb.Validate(data=tbl)
             .col_missing_consistent(columns=["a", "b"], missing=spec, when_reason="not_asked")

From 222bb6254bc0fedc02429b63a5b05e0f7e76b84e Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 15:37:51 -0400
Subject: [PATCH 51/55] Update test_col_missing_only_coded.py

---
 tests/test_col_missing_only_coded.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tests/test_col_missing_only_coded.py b/tests/test_col_missing_only_coded.py
index 7bb1bbbbb..55bf83b80 100644
--- a/tests/test_col_missing_only_coded.py
+++ b/tests/test_col_missing_only_coded.py
@@ -94,9 +94,7 @@ def test_report_and_step_report(self, spec):
         tbl = pl.DataFrame({"age": [34, -98, -95, 41]})
         v = (
             pb.Validate(data=tbl)
-            .col_missing_only_coded(
-                columns="age", missing=spec, min_val=0, max_val=120, brief=True
-            )
+            .col_missing_only_coded(columns="age", missing=spec, min_val=0, max_val=120, brief=True)
             .interrogate()
         )
         assert v.get_tabular_report() is not None
@@ -107,9 +105,7 @@ def test_brief_langs(self, spec, lang):
         tbl = pl.DataFrame({"age": [34, -95]})
         v = (
             pb.Validate(data=tbl, lang=lang)
-            .col_missing_only_coded(
-                columns="age", missing=spec, min_val=0, max_val=120, brief=True
-            )
+            .col_missing_only_coded(columns="age", missing=spec, min_val=0, max_val=120, brief=True)
             .interrogate()
         )
         assert _info(v).autobrief

From 476934c0a4a17fa958d5ac1f26e46c437cc42194 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 15:37:54 -0400
Subject: [PATCH 52/55] Update test_col_pct_missing.py

---
 tests/test_col_pct_missing.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/test_col_pct_missing.py b/tests/test_col_pct_missing.py
index a1c7d4b78..4dbce285f 100644
--- a/tests/test_col_pct_missing.py
+++ b/tests/test_col_pct_missing.py
@@ -123,9 +123,7 @@ def test_missing_must_be_missingspec(self, survey_tbl):
             )
 
     def test_multiple_columns(self, age_missing):
-        tbl = pl.DataFrame(
-            {"a": [1, -98, 3, 4], "b": [-99, -99, 3, 4]}
-        )
+        tbl = pl.DataFrame({"a": [1, -98, 3, 4], "b": [-99, -99, 3, 4]})
         validation = (
             pb.Validate(data=tbl)
             .col_pct_missing(columns=["a", "b"], missing=age_missing, max_pct=0.5)

From 8bc28846fed8987626fa95d2bac3179ce0356272 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 15:37:58 -0400
Subject: [PATCH 53/55] Update test_missing_report_integration.py

---
 tests/test_missing_report_integration.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tests/test_missing_report_integration.py b/tests/test_missing_report_integration.py
index edd08cda2..6751ec13e 100644
--- a/tests/test_missing_report_integration.py
+++ b/tests/test_missing_report_integration.py
@@ -20,11 +20,7 @@ def test_tabular_report_annotates_missing_aware_steps():
 
 def test_tabular_report_no_annotation_without_missing():
     tbl = pl.DataFrame({"age": [34, -98, 41, 200]})
-    v = (
-        pb.Validate(data=tbl)
-        .col_vals_between(columns="age", left=0, right=120)
-        .interrogate()
-    )
+    v = pb.Validate(data=tbl).col_vals_between(columns="age", left=0, right=120).interrogate()
     html = v.get_tabular_report().as_raw_html()
     assert "MISSING-AWARE" not in html
     assert "Missing codes" not in html

From 096027aac271c3b53a30d613c9d050b8ed9f64c1 Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 15:46:36 -0400
Subject: [PATCH 54/55] Regenerate validate.pyi for the _ValidationInfo.missing
 field

---
 pointblank/validate.pyi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pointblank/validate.pyi b/pointblank/validate.pyi
index 25e1714b9..72df3911c 100644
--- a/pointblank/validate.pyi
+++ b/pointblank/validate.pyi
@@ -106,6 +106,7 @@ class _ValidationInfo:
     values: Any | list[Any] | tuple | None = ...
     inclusive: tuple[bool, bool] | None = ...
     na_pass: bool | None = ...
+    missing: Any | None = ...
     pre: Callable | None = ...
     segments: Any | None = ...
     thresholds: Thresholds | None = ...

From 5dfd4646019bc8964232069d553e06223864e54b Mon Sep 17 00:00:00 2001
From: Richard Iannone <riannone@me.com>
Date: Wed, 17 Jun 2026 16:11:15 -0400
Subject: [PATCH 55/55] Cap pandas <3 in test/dev deps to match the pinned
 duckdb

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2b4c897ac..314790628 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -78,7 +78,7 @@ docs = [
     "jupyter",
     "nbclient>=0.10.0",
     "nbformat>=5.10.4",
-    "pandas>=2.2.3",
+    "pandas>=2.2.3,<3",  # <3: pandas 3.0's default `str` dtype is unsupported by the pinned duckdb (<1.3.3)
     "polars>=1.17.1",
     "pyspark==3.5.6",
     "openpyxl>=3.0.0",
@@ -94,7 +94,7 @@ dev = [
     "jupyter",
     "nbclient>=0.10.0",
     "nbformat>=5.10.4",
-    "pandas>=2.2.3",
+    "pandas>=2.2.3,<3",  # <3: pandas 3.0's default `str` dtype is unsupported by the pinned duckdb (<1.3.3)
     "polars>=1.17.1",
     "pre-commit==2.15.0",
     "pyarrow",