Source code for causalpy.checks.outcome_falsification

#   Copyright 2022 - 2026 The PyMC Labs Developers
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.
"""
Outcome falsification sensitivity check.

Re-fits the same causal method with alternative outcome formulas and
reports their estimated effect sizes.  The researcher interprets
whether the pattern of effects across outcomes is consistent with
their causal story.

Inspired by the "causal detective" approach in Gallea (2026),
*The Causal Mindset Handbook*, where Hanlon's London fog study
tests the pollution hypothesis by comparing effect sizes across
outcomes (pneumonia deaths vs. accident deaths vs. crime deaths).

Supports formula-based experiments: InterruptedTimeSeries,
DifferenceInDifferences, PiecewiseITS.
"""

from __future__ import annotations

import logging
from dataclasses import dataclass
from typing import Any

import pandas as pd

from causalpy.checks.base import CheckResult, clone_model
from causalpy.experiments.base import BaseExperiment
from causalpy.experiments.diff_in_diff import DifferenceInDifferences
from causalpy.experiments.interrupted_time_series import InterruptedTimeSeries
from causalpy.experiments.piecewise_its import PiecewiseITS
from causalpy.pipeline import PipelineContext
from causalpy.pymc_models import PyMCModel

logger = logging.getLogger(__name__)



[docs]
@dataclass
class FalsificationResult:
    """Result for a single falsification formula.

    Attributes
    ----------
    formula : str
        The falsification formula used.
    experiment : BaseExperiment
        The fitted experiment for this formula.
    effect_mean : float
        Posterior mean of the estimated effect.
    hdi_lower : float
        Lower bound of the HDI for the effect.
    hdi_upper : float
        Upper bound of the HDI for the effect.
    """

    formula: str
    experiment: BaseExperiment
    effect_mean: float
    hdi_lower: float
    hdi_upper: float




[docs]
class OutcomeFalsification:
    """Outcome falsification sensitivity check.

    Re-fits the experiment with alternative formulas and reports the
    estimated effect size for each.  This is an informational check --
    it does not make pass/fail judgments.  The researcher compares
    effect sizes across outcomes to assess whether the pattern is
    consistent with their causal story.

    Parameters
    ----------
    formulas : list[str]
        Falsification formulas.  Each must be a complete patsy formula
        (e.g., ``"beer ~ 1 + year"``).  The outcome variable must
        exist in the data.
    alpha : float, default 0.05
        Significance level.  The HDI probability is ``1 - alpha``.

    Examples
    --------
    >>> import causalpy as cp  # doctest: +SKIP
    >>> check = cp.checks.OutcomeFalsification(  # doctest: +SKIP
    ...     formulas=["beer ~ 1 + year", "lnincome ~ 1 + year"],
    ... )
    """

    applicable_methods: set[type[BaseExperiment]] = {
        InterruptedTimeSeries,
        DifferenceInDifferences,
        PiecewiseITS,
    }


[docs]
    def __init__(
        self,
        formulas: list[str],
        alpha: float = 0.05,
    ) -> None:
        if not formulas:
            raise ValueError("formulas must be a non-empty list of formula strings.")
        if not all(isinstance(f, str) for f in formulas):
            raise TypeError("Each formula must be a string.")
        if not 0 < alpha < 1:
            raise ValueError(f"alpha must be in (0, 1), got {alpha}")
        self.formulas = list(formulas)
        self.alpha = alpha


    # ------------------------------------------------------------------
    # Validation
    # ------------------------------------------------------------------


[docs]
    def validate(self, experiment: BaseExperiment) -> None:
        """Verify the experiment is compatible with OutcomeFalsification.

        Raises
        ------
        TypeError
            If the experiment does not use a formula, or uses a
            non-PyMC model.
        """
        if not hasattr(experiment, "formula"):
            raise TypeError(
                f"{type(experiment).__name__} does not use a formula. "
                f"OutcomeFalsification requires formula-based experiments "
                f"(InterruptedTimeSeries, DifferenceInDifferences, PiecewiseITS)."
            )
        if not isinstance(experiment.model, PyMCModel):
            raise TypeError(
                f"OutcomeFalsification requires a PyMC model for posterior "
                f"extraction, but got {type(experiment.model).__name__}. "
                f"Use a PyMC model (e.g. cp.pymc_models.LinearRegression)."
            )


    # ------------------------------------------------------------------
    # Factory helper
    # ------------------------------------------------------------------

    @staticmethod
    def _build_experiment(
        context: PipelineContext,
        formula: str,
    ) -> BaseExperiment:
        """Create and fit an experiment with a different formula.

        Uses ``context.experiment_config`` to reconstruct the experiment
        with all original parameters except the formula.
        """
        if context.experiment_config is None:
            raise RuntimeError(
                "No experiment_config in context. Use EstimateEffect "
                "before SensitivityAnalysis, or pass an explicit "
                "experiment_factory."
            )

        config = context.experiment_config
        method = config["method"]
        kwargs = {k: v for k, v in config.items() if k != "method"}

        kwargs["formula"] = formula

        if "model" in kwargs and kwargs["model"] is not None:
            kwargs["model"] = clone_model(kwargs["model"])

        return method(context.data, **kwargs)

    # ------------------------------------------------------------------
    # Effect extraction
    # ------------------------------------------------------------------

    @staticmethod
    def _extract_effect_stats(
        experiment: BaseExperiment,
        alpha: float,
    ) -> dict[str, float]:
        """Extract mean and HDI from a fitted experiment.

        Uses the experiment's ``effect_summary()`` method which provides
        standardized output across experiment types.  The returned table
        always contains ``hdi_lower`` and ``hdi_upper`` columns.
        """
        summary = experiment.effect_summary(
            alpha=alpha, direction="two-sided", cumulative=False, relative=False
        )
        table = summary.table
        row = table.iloc[0]

        return {
            "mean": float(row["mean"]),
            "hdi_lower": float(row["hdi_lower"]),
            "hdi_upper": float(row["hdi_upper"]),
        }

    # ------------------------------------------------------------------
    # Main entry point
    # ------------------------------------------------------------------


[docs]
    def run(
        self,
        experiment: BaseExperiment,
        context: PipelineContext,
    ) -> CheckResult:
        """Run outcome falsification analysis.

        For each falsification formula, fits the experiment with
        the alternative formula and reports the estimated effect size
        with HDI intervals.

        This is an informational check (``passed=None``).  The
        researcher interprets whether the pattern of effect sizes
        across outcomes supports their causal story.

        Returns
        -------
        CheckResult
            With ``passed=None`` (informational).  The table contains
            effect sizes and HDI intervals for each falsification
            formula.
        """
        self.validate(experiment)

        results: list[FalsificationResult] = []
        rows: list[dict[str, Any]] = []
        failed_formulas: list[str] = []

        hdi_pct = int(round((1 - self.alpha) * 100))

        for formula in self.formulas:
            logger.info(
                "OutcomeFalsification: fitting formula '%s'",
                formula,
            )
            try:
                alt_experiment = self._build_experiment(context, formula)
                stats = self._extract_effect_stats(alt_experiment, self.alpha)

                fr = FalsificationResult(
                    formula=formula,
                    experiment=alt_experiment,
                    effect_mean=stats["mean"],
                    hdi_lower=stats["hdi_lower"],
                    hdi_upper=stats["hdi_upper"],
                )
                results.append(fr)
                rows.append(
                    {
                        "formula": formula,
                        "effect_mean": stats["mean"],
                        f"hdi_{hdi_pct}%_lower": stats["hdi_lower"],
                        f"hdi_{hdi_pct}%_upper": stats["hdi_upper"],
                    }
                )

            except Exception:
                logger.warning(
                    "OutcomeFalsification: failed for formula '%s'",
                    formula,
                    exc_info=True,
                )
                failed_formulas.append(formula)
                rows.append(
                    {
                        "formula": formula,
                        "effect_mean": float("nan"),
                        f"hdi_{hdi_pct}%_lower": float("nan"),
                        f"hdi_{hdi_pct}%_upper": float("nan"),
                    }
                )

        table = pd.DataFrame(rows) if rows else None

        parts = [
            f"Outcome falsification: {len(self.formulas)} formula(s), {hdi_pct}% HDI.",
        ]
        if failed_formulas:
            parts.append(
                f"  {len(failed_formulas)} formula(s) failed to fit: {failed_formulas}"
            )
        for fr in results:
            parts.append(
                f"  {fr.formula}: "
                f"effect = {fr.effect_mean:.4f} "
                f"[{fr.hdi_lower:.4f}, {fr.hdi_upper:.4f}]"
            )
        text = "\n".join(parts)

        return CheckResult(
            check_name="OutcomeFalsification",
            passed=None,
            table=table,
            text=text,
            metadata={
                "falsification_results": results,
                "alpha": self.alpha,
            },
        )


    def __repr__(self) -> str:
        return f"OutcomeFalsification(formulas={self.formulas!r}, alpha={self.alpha})"