sktime · WHOIM1205 · Mar 31, 2026
@@ -0,0 +1,71 @@
+# Bayesian Modelling API — Proof of Concept
+
+This PoC demonstrates the core architecture for a Bayesian modelling API layer
+in skpro, as proposed for GSoC 2026.
+
+## What this PoC contains
+
+| File | Purpose |
+|------|---------|
+| `_base.py` | `BaseBayesianRegressor` — mixin that adds prior/posterior/update interfaces to `BaseProbaRegressor` |
+| `_prior.py` | `Prior` — wraps skpro distributions as parameter priors (backend-agnostic) |
+| `_ridge.py` | `BayesianRidgeRegressor` — working estimator with evidence maximization, posterior access, and sequential updating |
+| `tests/test_bayesian_ridge.py` | Tests covering fit, predict, posterior API, sequential updates, and Prior class |
+
+## Core idea
+
+The central mechanism is a **`BaseBayesianRegressor`** mixin that extends skpro's
+existing `BaseProbaRegressor` with a standardized Bayesian interface:
+
+- `get_prior()` / `set_prior()` — inspect and configure prior distributions
+- `get_posterior()` / `get_posterior_summary()` — access fitted posterior
+- `sample_posterior(n)` — draw from the parameter posterior
+- `update(X, y)` — sequential Bayesian update (posterior becomes new prior)
+
+Priors are specified using skpro's own 68+ distribution classes (via the `Prior`
+wrapper), so the same `Normal`, `HalfCauchy`, `InverseGamma`, etc. work as both
+priors and predictive distributions — no separate prior DSL needed.
+
+The `BayesianRidgeRegressor` demonstrates this in action: it performs Type-II
+maximum likelihood (evidence maximization) to optimize hyperparameters, computes
+the closed-form posterior, and returns standard `BaseDistribution` objects from
+`predict_proba` — fully compatible with skpro's metrics, pipelines, and model
+selection tools.
+
+## How to run locally
+
+```bash
+# From the repository root
+pip install -e .
+
+# Run the tests
+pytest skpro/regression/bayesian/tests/test_bayesian_ridge.py -v -o "addopts="
+
+# Quick smoke test
+python3 -c "
+from skpro.regression.bayesian import BayesianRidgeRegressor
+from sklearn.datasets import load_diabetes
+from sklearn.model_selection import train_test_split
+
+X, y = load_diabetes(return_X_y=True, as_frame=True)
+X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+
+reg = BayesianRidgeRegressor()
+reg.fit(X_train, y_train)
+
+# Full predictive distribution
+dist = reg.predict_proba(X_test)
+print('Predictive mean shape:', dist.mean().shape)
+print('Predictive std sample:', dist.var().values[:3, 0] ** 0.5)
+
+# Bayesian-specific API
+print(reg.get_posterior_summary())
+"
+```
+
+## What this does NOT include
+
+- No UI, no notebooks, no full pipeline
+- No MCMC/VI inference (the full project adds these)
+- No survival models (planned for Phase 3)
+- No new distribution classes (skpro's existing 68+ are sufficient)
@@ -1,10 +1,18 @@
-"""Base classes for Bayesian probabilistic regression."""
+"""Bayesian probabilistic regression estimators."""
 # copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
 
 __all__ = [
+    "BaseBayesianRegressor",
     "BayesianConjugateLinearRegressor",
     "BayesianLinearRegressor",
+    "BayesianRidgeRegressor",
+    "Prior",
 ]
 
-from skpro.regression.bayesian._linear_conjugate import BayesianConjugateLinearRegressor
+from skpro.regression.bayesian._base import BaseBayesianRegressor
+from skpro.regression.bayesian._linear_conjugate import (
+    BayesianConjugateLinearRegressor,
+)
 from skpro.regression.bayesian._linear_mcmc import BayesianLinearRegressor
+from skpro.regression.bayesian._prior import Prior
+from skpro.regression.bayesian._ridge import BayesianRidgeRegressor
@@ -0,0 +1,176 @@
+"""Base class for Bayesian probabilistic regressors."""
+# copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
+
+__author__ = ["david_laid"]
+
+import pandas as pd
+
+from skpro.regression.base import BaseProbaRegressor
+
+
+class BaseBayesianRegressor(BaseProbaRegressor):
+    """Base mixin for Bayesian probabilistic regressors.
+
+    Extends ``BaseProbaRegressor`` with standardized interfaces for:
+
+    * Prior specification and access (``get_prior``, ``set_prior``)
+    * Posterior access (``get_posterior``, ``get_posterior_summary``)
+    * Posterior sampling (``sample_posterior``)
+    * Sequential Bayesian updating (inherits ``update`` from base class)
+
+    Subclasses must implement
+    -------------------------
+    _fit(X, y, C=None)
+        Perform posterior inference given training data.
+    _predict_proba(X)
+        Return the posterior predictive distribution.
+    _get_prior_params()
+        Return a ``dict[str, BaseDistribution]`` of prior distributions.
+    _get_posterior_params()
+        Return a ``dict[str, BaseDistribution]`` of posterior distributions.
+
+    Optionally override
+    -------------------
+    _sample_posterior(n_samples)
+        Draw from the parameter posterior (default uses distribution sampling).
+    _update(X, y, C=None)
+        Efficient sequential Bayesian update.
+    """
+
+    _tags = {
+        "capability:update": True,
+    }
+
+    # --- Prior interface ---------------------------------------------------
+
+    def get_prior(self):
+        """Return prior distributions over model parameters.
+
+        Returns
+        -------
+        prior : dict of str -> BaseDistribution
+            Mapping from parameter name to its prior distribution.
+        """
+        return self._get_prior_params()
+
+    def _get_prior_params(self):
+        """Return prior distributions. Override in subclasses."""
+        raise NotImplementedError(
+            f"{type(self).__name__} does not implement _get_prior_params"
+        )
+
+    def set_prior(self, **priors):
+        """Set prior distributions for model parameters.
+
+        Parameters
+        ----------
+        **priors : BaseDistribution or Prior
+            Prior distributions keyed by parameter name.
+
+        Returns
+        -------
+        self
+        """
+        if not hasattr(self, "_custom_priors"):
+            self._custom_priors = {}
+        self._custom_priors.update(priors)
+        return self
+
+    # --- Posterior interface ------------------------------------------------
+
+    def get_posterior(self):
+        """Return posterior distributions over model parameters.
+
+        Must be called after ``fit``.
+
+        Returns
+        -------
+        posterior : dict of str -> BaseDistribution
+            Mapping from parameter name to its posterior distribution.
+        """
+        self.check_is_fitted()
+        return self._get_posterior_params()
+
+    def _get_posterior_params(self):
+        """Return posterior distributions. Override in subclasses."""
+        raise NotImplementedError(
+            f"{type(self).__name__} does not implement _get_posterior_params"
+        )
+
+    def get_posterior_summary(self):
+        """Return summary statistics of the posterior.
+
+        Returns
+        -------
+        summary : pd.DataFrame
+            DataFrame with mean, std, and 95% credible interval bounds
+            for each model parameter.
+        """
+        self.check_is_fitted()
+        return self._get_posterior_summary()
+
+    def _get_posterior_summary(self):
+        """Compute posterior summary. Override for custom behaviour."""
+        posterior = self._get_posterior_params()
+        rows = []
+        for name, dist in posterior.items():
+            mean_val = dist.mean()
+            var_val = dist.var()
+            q_lo = dist.ppf(0.025)
+            q_hi = dist.ppf(0.975)
+
+            # Flatten to scalar when possible
+            def _scalar(v):
+                try:
+                    return float(v.values.ravel()[0])
+                except (AttributeError, IndexError):
+                    return float(v)
+
+            rows.append(
+                {
+                    "parameter": name,
+                    "mean": _scalar(mean_val),
+                    "std": _scalar(var_val) ** 0.5,
+                    "q_0.025": _scalar(q_lo),
+                    "q_0.975": _scalar(q_hi),
+                }
+            )
+        return pd.DataFrame(rows).set_index("parameter")
+
+    # --- Posterior sampling -------------------------------------------------
+
+    def sample_posterior(self, n_samples=100):
+        """Sample from the posterior distribution over parameters.
+
+        Parameters
+        ----------
+        n_samples : int, default=100
+            Number of posterior samples to draw.
+
+        Returns
+        -------
+        samples : dict of str -> np.ndarray
+            Parameter samples keyed by parameter name.
+        """
+        self.check_is_fitted()
+        return self._sample_posterior(n_samples=n_samples)
+
+    def _sample_posterior(self, n_samples=100):
+        """Sample from posterior. Default uses distribution ``.sample()``."""
+        posterior = self._get_posterior_params()
+        return {
+            name: dist.sample(n_samples) for name, dist in posterior.items()
+        }
+
+    # --- Sequential updating -----------------------------------------------
+
+    def _update(self, X, y, C=None):
+        """Bayesian sequential update — posterior becomes new prior.
+
+        Default raises ``NotImplementedError``; subclasses with efficient
+        conjugate or incremental updates should override.
+        """
+        raise NotImplementedError(
+            f"{type(self).__name__} does not implement _update. "
+            "Re-fit with all data or use an estimator that supports updates."
+        )
@@ -0,0 +1,86 @@
+"""Prior specification for Bayesian model parameters.
+
+Wraps skpro distribution objects with parameter metadata,
+providing a unified prior specification language across all
+Bayesian estimators.
+"""
+# copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
+
+__author__ = ["david_laid"]
+
+
+class Prior:
+    """Prior specification for a single model parameter.
+
+    Wraps a skpro ``BaseDistribution`` instance with a parameter name,
+    allowing Bayesian estimators to accept priors in a backend-agnostic way.
+
+    Parameters
+    ----------
+    distribution : BaseDistribution
+        A skpro distribution instance (e.g., ``Normal(mu=0, sigma=10)``).
+    name : str, optional
+        Name of the model parameter this prior applies to.
+
+    Examples
+    --------
+    >>> from skpro.distributions import Normal
+    >>> from skpro.regression.bayesian._prior import Prior
+    >>> prior = Prior(Normal(mu=0, sigma=10), name="coefficients")
+    >>> prior.sample(3).shape  # doctest: +SKIP
+    (3, 1, 1)
+    """
+
+    def __init__(self, distribution, name=None):
+        from skpro.distributions.base import BaseDistribution
+
+        if not isinstance(distribution, BaseDistribution):
+            raise TypeError(
+                f"`distribution` must be a skpro BaseDistribution, "
+                f"got {type(distribution).__name__}"
+            )
+        self.distribution = distribution
+        self.name = name
+
+    def sample(self, n_samples=1):
+        """Draw samples from the prior distribution.
+
+        Parameters
+        ----------
+        n_samples : int, default=1
+            Number of samples to draw.
+
+        Returns
+        -------
+        samples : pd.DataFrame
+            Samples from the prior distribution.
+        """
+        return self.distribution.sample(n_samples)
+
+    def log_pdf(self, x):
+        """Evaluate the log-density of the prior at ``x``.
+
+        Parameters
+        ----------
+        x : array-like
+            Points at which to evaluate the log-density.
+
+        Returns
+        -------
+        log_density : pd.DataFrame
+            Log-density values.
+        """
+        return self.distribution.log_pdf(x)
+
+    def mean(self):
+        """Return the prior mean."""
+        return self.distribution.mean()
+
+    def var(self):
+        """Return the prior variance."""
+        return self.distribution.var()
+
+    def __repr__(self):
+        dist_name = type(self.distribution).__name__
+        name_str = f", name='{self.name}'" if self.name is not None else ""
+        return f"Prior({dist_name}{name_str})"