From c9479eaa78aed7a5e4a9d337a0d4eac1974dbad1 Mon Sep 17 00:00:00 2001 From: WHOIM1205 Date: Tue, 31 Mar 2026 05:38:18 -0700 Subject: [PATCH] [ENH] Add Bayesian modelling API PoC: base class, prior spec, ridge regressor Signed-off-by: WHOIM1205 --- skpro/regression/bayesian/README.md | 71 ++++ skpro/regression/bayesian/__init__.py | 12 +- skpro/regression/bayesian/_base.py | 176 ++++++++++ skpro/regression/bayesian/_prior.py | 86 +++++ skpro/regression/bayesian/_ridge.py | 329 ++++++++++++++++++ skpro/regression/bayesian/tests/__init__.py | 0 .../bayesian/tests/test_bayesian_ridge.py | 138 ++++++++ 7 files changed, 810 insertions(+), 2 deletions(-) create mode 100644 skpro/regression/bayesian/README.md create mode 100644 skpro/regression/bayesian/_base.py create mode 100644 skpro/regression/bayesian/_prior.py create mode 100644 skpro/regression/bayesian/_ridge.py create mode 100644 skpro/regression/bayesian/tests/__init__.py create mode 100644 skpro/regression/bayesian/tests/test_bayesian_ridge.py diff --git a/skpro/regression/bayesian/README.md b/skpro/regression/bayesian/README.md new file mode 100644 index 000000000..c36fb5f48 --- /dev/null +++ b/skpro/regression/bayesian/README.md @@ -0,0 +1,71 @@ +# Bayesian Modelling API — Proof of Concept + +This PoC demonstrates the core architecture for a Bayesian modelling API layer +in skpro, as proposed for GSoC 2026. + +## What this PoC contains + +| File | Purpose | +|------|---------| +| `_base.py` | `BaseBayesianRegressor` — mixin that adds prior/posterior/update interfaces to `BaseProbaRegressor` | +| `_prior.py` | `Prior` — wraps skpro distributions as parameter priors (backend-agnostic) | +| `_ridge.py` | `BayesianRidgeRegressor` — working estimator with evidence maximization, posterior access, and sequential updating | +| `tests/test_bayesian_ridge.py` | Tests covering fit, predict, posterior API, sequential updates, and Prior class | + +## Core idea + +The central mechanism is a **`BaseBayesianRegressor`** mixin that extends skpro's +existing `BaseProbaRegressor` with a standardized Bayesian interface: + +- `get_prior()` / `set_prior()` — inspect and configure prior distributions +- `get_posterior()` / `get_posterior_summary()` — access fitted posterior +- `sample_posterior(n)` — draw from the parameter posterior +- `update(X, y)` — sequential Bayesian update (posterior becomes new prior) + +Priors are specified using skpro's own 68+ distribution classes (via the `Prior` +wrapper), so the same `Normal`, `HalfCauchy`, `InverseGamma`, etc. work as both +priors and predictive distributions — no separate prior DSL needed. + +The `BayesianRidgeRegressor` demonstrates this in action: it performs Type-II +maximum likelihood (evidence maximization) to optimize hyperparameters, computes +the closed-form posterior, and returns standard `BaseDistribution` objects from +`predict_proba` — fully compatible with skpro's metrics, pipelines, and model +selection tools. + +## How to run locally + +```bash +# From the repository root +pip install -e . + +# Run the tests +pytest skpro/regression/bayesian/tests/test_bayesian_ridge.py -v -o "addopts=" + +# Quick smoke test +python3 -c " +from skpro.regression.bayesian import BayesianRidgeRegressor +from sklearn.datasets import load_diabetes +from sklearn.model_selection import train_test_split + +X, y = load_diabetes(return_X_y=True, as_frame=True) +X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + +reg = BayesianRidgeRegressor() +reg.fit(X_train, y_train) + +# Full predictive distribution +dist = reg.predict_proba(X_test) +print('Predictive mean shape:', dist.mean().shape) +print('Predictive std sample:', dist.var().values[:3, 0] ** 0.5) + +# Bayesian-specific API +print(reg.get_posterior_summary()) +" +``` + +## What this does NOT include + +- No UI, no notebooks, no full pipeline +- No MCMC/VI inference (the full project adds these) +- No survival models (planned for Phase 3) +- No new distribution classes (skpro's existing 68+ are sufficient) diff --git a/skpro/regression/bayesian/__init__.py b/skpro/regression/bayesian/__init__.py index 84b7d3e62..78e844a32 100644 --- a/skpro/regression/bayesian/__init__.py +++ b/skpro/regression/bayesian/__init__.py @@ -1,10 +1,18 @@ -"""Base classes for Bayesian probabilistic regression.""" +"""Bayesian probabilistic regression estimators.""" # copyright: skpro developers, BSD-3-Clause License (see LICENSE file) __all__ = [ + "BaseBayesianRegressor", "BayesianConjugateLinearRegressor", "BayesianLinearRegressor", + "BayesianRidgeRegressor", + "Prior", ] -from skpro.regression.bayesian._linear_conjugate import BayesianConjugateLinearRegressor +from skpro.regression.bayesian._base import BaseBayesianRegressor +from skpro.regression.bayesian._linear_conjugate import ( + BayesianConjugateLinearRegressor, +) from skpro.regression.bayesian._linear_mcmc import BayesianLinearRegressor +from skpro.regression.bayesian._prior import Prior +from skpro.regression.bayesian._ridge import BayesianRidgeRegressor diff --git a/skpro/regression/bayesian/_base.py b/skpro/regression/bayesian/_base.py new file mode 100644 index 000000000..0e1801869 --- /dev/null +++ b/skpro/regression/bayesian/_base.py @@ -0,0 +1,176 @@ +"""Base class for Bayesian probabilistic regressors.""" +# copyright: skpro developers, BSD-3-Clause License (see LICENSE file) + +__author__ = ["david_laid"] + +import pandas as pd + +from skpro.regression.base import BaseProbaRegressor + + +class BaseBayesianRegressor(BaseProbaRegressor): + """Base mixin for Bayesian probabilistic regressors. + + Extends ``BaseProbaRegressor`` with standardized interfaces for: + + * Prior specification and access (``get_prior``, ``set_prior``) + * Posterior access (``get_posterior``, ``get_posterior_summary``) + * Posterior sampling (``sample_posterior``) + * Sequential Bayesian updating (inherits ``update`` from base class) + + Subclasses must implement + ------------------------- + _fit(X, y, C=None) + Perform posterior inference given training data. + _predict_proba(X) + Return the posterior predictive distribution. + _get_prior_params() + Return a ``dict[str, BaseDistribution]`` of prior distributions. + _get_posterior_params() + Return a ``dict[str, BaseDistribution]`` of posterior distributions. + + Optionally override + ------------------- + _sample_posterior(n_samples) + Draw from the parameter posterior (default uses distribution sampling). + _update(X, y, C=None) + Efficient sequential Bayesian update. + """ + + _tags = { + "capability:update": True, + } + + # --- Prior interface --------------------------------------------------- + + def get_prior(self): + """Return prior distributions over model parameters. + + Returns + ------- + prior : dict of str -> BaseDistribution + Mapping from parameter name to its prior distribution. + """ + return self._get_prior_params() + + def _get_prior_params(self): + """Return prior distributions. Override in subclasses.""" + raise NotImplementedError( + f"{type(self).__name__} does not implement _get_prior_params" + ) + + def set_prior(self, **priors): + """Set prior distributions for model parameters. + + Parameters + ---------- + **priors : BaseDistribution or Prior + Prior distributions keyed by parameter name. + + Returns + ------- + self + """ + if not hasattr(self, "_custom_priors"): + self._custom_priors = {} + self._custom_priors.update(priors) + return self + + # --- Posterior interface ------------------------------------------------ + + def get_posterior(self): + """Return posterior distributions over model parameters. + + Must be called after ``fit``. + + Returns + ------- + posterior : dict of str -> BaseDistribution + Mapping from parameter name to its posterior distribution. + """ + self.check_is_fitted() + return self._get_posterior_params() + + def _get_posterior_params(self): + """Return posterior distributions. Override in subclasses.""" + raise NotImplementedError( + f"{type(self).__name__} does not implement _get_posterior_params" + ) + + def get_posterior_summary(self): + """Return summary statistics of the posterior. + + Returns + ------- + summary : pd.DataFrame + DataFrame with mean, std, and 95% credible interval bounds + for each model parameter. + """ + self.check_is_fitted() + return self._get_posterior_summary() + + def _get_posterior_summary(self): + """Compute posterior summary. Override for custom behaviour.""" + posterior = self._get_posterior_params() + rows = [] + for name, dist in posterior.items(): + mean_val = dist.mean() + var_val = dist.var() + q_lo = dist.ppf(0.025) + q_hi = dist.ppf(0.975) + + # Flatten to scalar when possible + def _scalar(v): + try: + return float(v.values.ravel()[0]) + except (AttributeError, IndexError): + return float(v) + + rows.append( + { + "parameter": name, + "mean": _scalar(mean_val), + "std": _scalar(var_val) ** 0.5, + "q_0.025": _scalar(q_lo), + "q_0.975": _scalar(q_hi), + } + ) + return pd.DataFrame(rows).set_index("parameter") + + # --- Posterior sampling ------------------------------------------------- + + def sample_posterior(self, n_samples=100): + """Sample from the posterior distribution over parameters. + + Parameters + ---------- + n_samples : int, default=100 + Number of posterior samples to draw. + + Returns + ------- + samples : dict of str -> np.ndarray + Parameter samples keyed by parameter name. + """ + self.check_is_fitted() + return self._sample_posterior(n_samples=n_samples) + + def _sample_posterior(self, n_samples=100): + """Sample from posterior. Default uses distribution ``.sample()``.""" + posterior = self._get_posterior_params() + return { + name: dist.sample(n_samples) for name, dist in posterior.items() + } + + # --- Sequential updating ----------------------------------------------- + + def _update(self, X, y, C=None): + """Bayesian sequential update — posterior becomes new prior. + + Default raises ``NotImplementedError``; subclasses with efficient + conjugate or incremental updates should override. + """ + raise NotImplementedError( + f"{type(self).__name__} does not implement _update. " + "Re-fit with all data or use an estimator that supports updates." + ) diff --git a/skpro/regression/bayesian/_prior.py b/skpro/regression/bayesian/_prior.py new file mode 100644 index 000000000..443ddb816 --- /dev/null +++ b/skpro/regression/bayesian/_prior.py @@ -0,0 +1,86 @@ +"""Prior specification for Bayesian model parameters. + +Wraps skpro distribution objects with parameter metadata, +providing a unified prior specification language across all +Bayesian estimators. +""" +# copyright: skpro developers, BSD-3-Clause License (see LICENSE file) + +__author__ = ["david_laid"] + + +class Prior: + """Prior specification for a single model parameter. + + Wraps a skpro ``BaseDistribution`` instance with a parameter name, + allowing Bayesian estimators to accept priors in a backend-agnostic way. + + Parameters + ---------- + distribution : BaseDistribution + A skpro distribution instance (e.g., ``Normal(mu=0, sigma=10)``). + name : str, optional + Name of the model parameter this prior applies to. + + Examples + -------- + >>> from skpro.distributions import Normal + >>> from skpro.regression.bayesian._prior import Prior + >>> prior = Prior(Normal(mu=0, sigma=10), name="coefficients") + >>> prior.sample(3).shape # doctest: +SKIP + (3, 1, 1) + """ + + def __init__(self, distribution, name=None): + from skpro.distributions.base import BaseDistribution + + if not isinstance(distribution, BaseDistribution): + raise TypeError( + f"`distribution` must be a skpro BaseDistribution, " + f"got {type(distribution).__name__}" + ) + self.distribution = distribution + self.name = name + + def sample(self, n_samples=1): + """Draw samples from the prior distribution. + + Parameters + ---------- + n_samples : int, default=1 + Number of samples to draw. + + Returns + ------- + samples : pd.DataFrame + Samples from the prior distribution. + """ + return self.distribution.sample(n_samples) + + def log_pdf(self, x): + """Evaluate the log-density of the prior at ``x``. + + Parameters + ---------- + x : array-like + Points at which to evaluate the log-density. + + Returns + ------- + log_density : pd.DataFrame + Log-density values. + """ + return self.distribution.log_pdf(x) + + def mean(self): + """Return the prior mean.""" + return self.distribution.mean() + + def var(self): + """Return the prior variance.""" + return self.distribution.var() + + def __repr__(self): + dist_name = type(self.distribution).__name__ + name_str = f", name='{self.name}'" if self.name is not None else "" + return f"Prior({dist_name}{name_str})" diff --git a/skpro/regression/bayesian/_ridge.py b/skpro/regression/bayesian/_ridge.py new file mode 100644 index 000000000..0204ac677 --- /dev/null +++ b/skpro/regression/bayesian/_ridge.py @@ -0,0 +1,329 @@ +"""Bayesian Ridge Regression estimator with evidence maximization.""" +# copyright: skpro developers, BSD-3-Clause License (see LICENSE file) + +__author__ = ["david_laid"] + +import numpy as np +import pandas as pd + +from skpro.distributions.normal import Normal +from skpro.regression.bayesian._base import BaseBayesianRegressor + + +class BayesianRidgeRegressor(BaseBayesianRegressor): + r"""Bayesian Ridge Regression with automatic evidence maximization. + + Fits a linear model ``y = X w + ε`` with Gaussian prior on weights + and Gaussian noise, using Type-II maximum likelihood (empirical Bayes) + to optimize the precision hyperparameters. + + Model + ----- + .. math:: + + w \sim \mathcal{N}(0, \alpha^{-1} I) + + y | X, w \sim \mathcal{N}(X w, \beta^{-1} I) + + The hyperparameters ``α`` (weight precision) and ``β`` (noise precision) + are optimized by maximizing the log marginal likelihood (evidence). + The posterior over ``w`` is then available in closed form: + + .. math:: + + w | X, y \sim \mathcal{N}(m_N, S_N) + + S_N = (\alpha I + \beta X^T X)^{-1} + + m_N = \beta S_N X^T y + + Parameters + ---------- + alpha_init : float, default=1e-6 + Initial value for weight precision ``α``. + beta_init : float, default=1e-6 + Initial value for noise precision ``β``. + n_iter : int, default=300 + Maximum number of evidence maximization iterations. + tol : float, default=1e-3 + Convergence tolerance on the log marginal likelihood. + fit_intercept : bool, default=True + Whether to fit an intercept term. + + Attributes + ---------- + alpha_ : float + Optimized weight precision. + beta_ : float + Optimized noise precision. + coef_mean_ : np.ndarray of shape (n_features,) + Posterior mean of the weight vector. + coef_cov_ : np.ndarray of shape (n_features, n_features) + Posterior covariance of the weight vector. + intercept_ : float + Fitted intercept (0.0 if ``fit_intercept=False``). + + Examples + -------- + >>> from skpro.regression.bayesian._ridge import BayesianRidgeRegressor + >>> from sklearn.datasets import load_diabetes + >>> from sklearn.model_selection import train_test_split + >>> X, y = load_diabetes(return_X_y=True, as_frame=True) # doctest: +SKIP + >>> X_train, X_test, y_train, y_test = train_test_split( + ... X, y, random_state=42 + ... ) # doctest: +SKIP + >>> reg = BayesianRidgeRegressor() # doctest: +SKIP + >>> reg.fit(X_train, y_train) # doctest: +SKIP + >>> dist = reg.predict_proba(X_test) # doctest: +SKIP + >>> dist.mean() # doctest: +SKIP + """ + + _tags = { + "authors": ["david_laid"], + "maintainers": ["david_laid"], + "capability:multioutput": False, + "capability:missing": True, + "capability:update": True, + "X_inner_mtype": "pd_DataFrame_Table", + "y_inner_mtype": "pd_DataFrame_Table", + } + + def __init__( + self, + alpha_init=1e-6, + beta_init=1e-6, + n_iter=300, + tol=1e-3, + fit_intercept=True, + ): + self.alpha_init = alpha_init + self.beta_init = beta_init + self.n_iter = n_iter + self.tol = tol + self.fit_intercept = fit_intercept + + super().__init__() + + # ------------------------------------------------------------------ + # Core fit / predict + # ------------------------------------------------------------------ + + def _fit(self, X, y): + """Fit Bayesian Ridge via evidence maximization. + + Parameters + ---------- + X : pd.DataFrame of shape (n_samples, n_features) + y : pd.DataFrame of shape (n_samples, 1) + + Returns + ------- + self + """ + self._y_cols = y.columns + + X_np = X.values.astype(np.float64) + y_np = y.values[:, 0].astype(np.float64) + + # Centre data if fitting intercept + if self.fit_intercept: + self._X_mean = X_np.mean(axis=0) + self._y_mean = y_np.mean() + X_np = X_np - self._X_mean + y_np = y_np - self._y_mean + else: + self._X_mean = np.zeros(X_np.shape[1]) + self._y_mean = 0.0 + + n_samples, n_features = X_np.shape + + # Precompute X^T X and X^T y + XtX = X_np.T @ X_np + Xty = X_np.T @ y_np + + # Eigendecompose X^T X for stable evidence updates + eigenvalues = np.linalg.eigvalsh(XtX) + + alpha = float(self.alpha_init) + beta = float(self.beta_init) + log_ml_prev = -np.inf + + for _ in range(self.n_iter): + # Posterior covariance and mean + S_N_inv = alpha * np.eye(n_features) + beta * XtX + S_N = np.linalg.inv(S_N_inv) + m_N = beta * S_N @ Xty + + # Effective number of well-determined parameters + gamma = np.sum(beta * eigenvalues / (alpha + beta * eigenvalues)) + + # Update hyperparameters + alpha = float(gamma / (m_N @ m_N)) + residuals = y_np - X_np @ m_N + beta = float( + (n_samples - gamma) / (residuals @ residuals) + ) + + # Log marginal likelihood for convergence check + log_ml = 0.5 * ( + n_features * np.log(alpha) + + n_samples * np.log(beta) + - beta * (residuals @ residuals) + - alpha * (m_N @ m_N) + - np.linalg.slogdet(S_N_inv)[1] + - n_samples * np.log(2 * np.pi) + ) + + if np.abs(log_ml - log_ml_prev) < self.tol: + break + log_ml_prev = log_ml + + # Store fitted parameters + self.alpha_ = alpha + self.beta_ = beta + self.coef_mean_ = m_N + self.coef_cov_ = S_N + self.intercept_ = self._y_mean - self._X_mean @ m_N + + # Store for potential sequential updates + self._XtX = XtX + self._Xty = Xty + self._n_samples = n_samples + self._eigenvalues = eigenvalues + + return self + + def _predict_proba(self, X): + """Return posterior predictive distribution. + + Parameters + ---------- + X : pd.DataFrame of shape (n_samples, n_features) + + Returns + ------- + dist : Normal + Posterior predictive Normal distribution. + """ + idx = X.index + X_np = X.values.astype(np.float64) + + # Predictive mean + pred_mean = X_np @ self.coef_mean_ + self.intercept_ + + # Predictive variance = noise variance + model uncertainty + # var(y*) = 1/beta + x^T S_N x + X_centred = X_np - self._X_mean + model_var = np.sum((X_centred @ self.coef_cov_) * X_centred, axis=1) + pred_var = 1.0 / self.beta_ + model_var + pred_std = np.sqrt(np.maximum(pred_var, 1e-12)) + + return Normal( + mu=pred_mean.reshape(-1, 1).tolist(), + sigma=pred_std.reshape(-1, 1).tolist(), + columns=self._y_cols, + index=idx, + ) + + # ------------------------------------------------------------------ + # Bayesian API (BaseBayesianRegressor interface) + # ------------------------------------------------------------------ + + def _get_prior_params(self): + """Return prior distributions over model parameters.""" + n_features = self.coef_mean_.shape[0] if hasattr(self, "coef_mean_") else 1 + alpha = self.alpha_ if hasattr(self, "alpha_") else self.alpha_init + beta = self.beta_ if hasattr(self, "beta_") else self.beta_init + + prior_std = (1.0 / alpha) ** 0.5 + return { + "coefficients": Normal( + mu=0.0, + sigma=prior_std, + ), + "noise_std": Normal( + mu=0.0, + sigma=(1.0 / beta) ** 0.5, + ), + } + + def _get_posterior_params(self): + """Return posterior distributions over model parameters.""" + post_std = np.sqrt(np.diag(self.coef_cov_)) + + return { + "coefficients": Normal( + mu=self.coef_mean_.tolist(), + sigma=post_std.tolist(), + ), + } + + def _update(self, X, y, C=None): + """Sequential Bayesian update with new data. + + Uses current posterior as the new prior and re-derives the + posterior incorporating the new observations. + + Parameters + ---------- + X : pd.DataFrame + y : pd.DataFrame + + Returns + ------- + self + """ + X_np = X.values.astype(np.float64) + y_np = y.values[:, 0].astype(np.float64) + + if self.fit_intercept: + X_np = X_np - self._X_mean + y_np = y_np - self._y_mean + + n_new = X_np.shape[0] + + # Accumulate sufficient statistics + self._XtX += X_np.T @ X_np + self._Xty += X_np.T @ y_np + self._n_samples += n_new + + # Re-derive posterior with current alpha/beta + n_features = X_np.shape[1] + S_N_inv = self.alpha_ * np.eye(n_features) + self.beta_ * self._XtX + S_N = np.linalg.inv(S_N_inv) + m_N = self.beta_ * S_N @ self._Xty + + self.coef_mean_ = m_N + self.coef_cov_ = S_N + self.intercept_ = self._y_mean - self._X_mean @ m_N + + return self + + # ------------------------------------------------------------------ + # Test params for skpro test suite + # ------------------------------------------------------------------ + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + + Returns + ------- + params : list of dict + """ + params1 = { + "alpha_init": 1e-6, + "beta_init": 1e-6, + "n_iter": 100, + } + params2 = { + "alpha_init": 1.0, + "beta_init": 1.0, + "n_iter": 50, + "fit_intercept": False, + } + return [params1, params2] diff --git a/skpro/regression/bayesian/tests/__init__.py b/skpro/regression/bayesian/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/skpro/regression/bayesian/tests/test_bayesian_ridge.py b/skpro/regression/bayesian/tests/test_bayesian_ridge.py new file mode 100644 index 000000000..95f3521f2 --- /dev/null +++ b/skpro/regression/bayesian/tests/test_bayesian_ridge.py @@ -0,0 +1,138 @@ +"""Tests for BayesianRidgeRegressor and BaseBayesianRegressor API.""" +# copyright: skpro developers, BSD-3-Clause License (see LICENSE file) + +import numpy as np +import pandas as pd +import pytest + +from skpro.distributions.normal import Normal +from skpro.regression.bayesian._base import BaseBayesianRegressor +from skpro.regression.bayesian._prior import Prior +from skpro.regression.bayesian._ridge import BayesianRidgeRegressor + + +@pytest.fixture +def regression_data(): + """Generate simple linear regression data with known coefficients.""" + rng = np.random.default_rng(42) + n, d = 100, 3 + X = rng.standard_normal((n, d)) + true_coefs = np.array([1.5, -2.0, 0.5]) + y = X @ true_coefs + 0.1 * rng.standard_normal(n) + + X_df = pd.DataFrame(X, columns=["x1", "x2", "x3"]) + y_df = pd.DataFrame(y, columns=["target"]) + return X_df, y_df, true_coefs + + +class TestBayesianRidgeRegressor: + """Tests for BayesianRidgeRegressor.""" + + def test_fit_predict(self, regression_data): + """Test basic fit and predict_proba returns a Normal distribution.""" + X, y, _ = regression_data + reg = BayesianRidgeRegressor(n_iter=100) + reg.fit(X, y) + + dist = reg.predict_proba(X) + assert isinstance(dist, Normal) + + # Point predictions should be a DataFrame with correct shape + y_pred = reg.predict(X) + assert isinstance(y_pred, pd.DataFrame) + assert y_pred.shape == y.shape + + def test_posterior_recovers_true_coefs(self, regression_data): + """Test that posterior mean is close to true coefficients.""" + X, y, true_coefs = regression_data + reg = BayesianRidgeRegressor(n_iter=300) + reg.fit(X, y) + + np.testing.assert_allclose(reg.coef_mean_, true_coefs, atol=0.2) + + def test_get_posterior(self, regression_data): + """Test get_posterior returns dict of distributions.""" + X, y, _ = regression_data + reg = BayesianRidgeRegressor() + reg.fit(X, y) + + posterior = reg.get_posterior() + assert isinstance(posterior, dict) + assert "coefficients" in posterior + assert isinstance(posterior["coefficients"], Normal) + + def test_get_posterior_summary(self, regression_data): + """Test posterior summary returns DataFrame with expected columns.""" + X, y, _ = regression_data + reg = BayesianRidgeRegressor() + reg.fit(X, y) + + summary = reg.get_posterior_summary() + assert isinstance(summary, pd.DataFrame) + assert "mean" in summary.columns + assert "std" in summary.columns + assert "q_0.025" in summary.columns + assert "q_0.975" in summary.columns + + def test_sample_posterior(self, regression_data): + """Test sample_posterior returns parameter samples.""" + X, y, _ = regression_data + reg = BayesianRidgeRegressor() + reg.fit(X, y) + + samples = reg.sample_posterior(n_samples=50) + assert isinstance(samples, dict) + assert "coefficients" in samples + + def test_sequential_update(self, regression_data): + """Test that update changes the posterior.""" + X, y, _ = regression_data + X1, X2 = X.iloc[:50], X.iloc[50:] + y1, y2 = y.iloc[:50], y.iloc[50:] + + reg = BayesianRidgeRegressor(n_iter=100) + reg.fit(X1, y1) + coef_before = reg.coef_mean_.copy() + + reg.update(X2, y2) + coef_after = reg.coef_mean_.copy() + + # Posterior should change after seeing more data + assert not np.allclose(coef_before, coef_after) + + def test_get_test_params(self): + """Test that get_test_params returns valid parameter sets.""" + params_list = BayesianRidgeRegressor.get_test_params() + assert isinstance(params_list, list) + assert len(params_list) >= 1 + + for params in params_list: + reg = BayesianRidgeRegressor(**params) + assert isinstance(reg, BaseBayesianRegressor) + + def test_inherits_base_bayesian(self): + """Test that BayesianRidgeRegressor is a BaseBayesianRegressor.""" + reg = BayesianRidgeRegressor() + assert isinstance(reg, BaseBayesianRegressor) + + +class TestPrior: + """Tests for the Prior specification class.""" + + def test_prior_wraps_distribution(self): + """Test Prior wraps a skpro distribution.""" + dist = Normal(mu=0, sigma=1) + prior = Prior(dist, name="weights") + assert prior.name == "weights" + assert prior.distribution is dist + + def test_prior_rejects_non_distribution(self): + """Test Prior raises TypeError for non-distribution inputs.""" + with pytest.raises(TypeError, match="BaseDistribution"): + Prior(42) + + def test_prior_repr(self): + """Test Prior has informative repr.""" + prior = Prior(Normal(mu=0, sigma=1), name="w") + assert "Normal" in repr(prior) + assert "w" in repr(prior)