diff --git a/docs/source/api_reference/regression.rst b/docs/source/api_reference/regression.rst index d312b46a1..a12eebfef 100644 --- a/docs/source/api_reference/regression.rst +++ b/docs/source/api_reference/regression.rst @@ -259,6 +259,7 @@ for prior and posterior handling. BayesianConjugateLinearRegressor BayesianLinearRegressor + BayesianConjugateGLMRegressor Adapters to other interfaces ---------------------------- diff --git a/skpro/regression/bayesian/__init__.py b/skpro/regression/bayesian/__init__.py index 84b7d3e62..bc3c04f03 100644 --- a/skpro/regression/bayesian/__init__.py +++ b/skpro/regression/bayesian/__init__.py @@ -4,7 +4,9 @@ __all__ = [ "BayesianConjugateLinearRegressor", "BayesianLinearRegressor", + "BayesianConjugateGLMRegressor", ] +from skpro.regression.bayesian._glm_conjugate import BayesianConjugateGLMRegressor from skpro.regression.bayesian._linear_conjugate import BayesianConjugateLinearRegressor from skpro.regression.bayesian._linear_mcmc import BayesianLinearRegressor diff --git a/skpro/regression/bayesian/_glm_conjugate.py b/skpro/regression/bayesian/_glm_conjugate.py new file mode 100644 index 000000000..3ad6413d0 --- /dev/null +++ b/skpro/regression/bayesian/_glm_conjugate.py @@ -0,0 +1,504 @@ +# copyright: skpro developers, BSD-3-Clause License (see LICENSE file) +"""Bayesian GLM with Gaussian likelihood and conjugate priors.""" + +__author__ = ["arnavk23"] + +import numpy as np + +from skpro.distributions import Normal +from skpro.regression.base import BaseProbaRegressor + + +class BayesianConjugateGLMRegressor(BaseProbaRegressor): + r"""Bayesian GLM with Gaussian likelihood and conjugate priors. + + This estimator models the relationship between features :math:`X` and target + :math:`y` using a Bayesian GLM framework with conjugate priors + (multivariate normal or Normal-Gamma). + Only Gaussian link is supported (conjugate case). + + The model is: + + .. math:: + y = X \beta + \epsilon, \quad \epsilon \sim \mathcal{N}(0, \tau^{-1}) + + Priors: + + .. math:: + \beta \sim \mathcal{N}(\mu_0, \Sigma_0) + \tau \sim \text{Gamma}(a_0, b_0) + + where :math:`\beta` is the vector of coefficients + (including intercept if ``add_constant=True``), + :math:`\mu_0` and :math:`\Sigma_0` are prior mean and covariance, + and :math:`\tau` is the noise precision (now optionally random). + + Posterior and predictive updates are fully analytic: + - If ``noise_prior_shape`` and ``noise_prior_rate`` are set, + the predictive is Student-t (see Bishop Ch. 2.3.3, + arXiv:1604.04434). + - Otherwise, predictive is Normal. + + Parameters + ---------- + coefs_prior_cov : 2D np.ndarray, required + Covariance matrix of the prior for intercept and coefficients. + Must be positive-definite. + coefs_prior_mu : np.ndarray column vector, optional + Mean vector of the prior for intercept and coefficients. + If not provided, assumed to be a column vector of zeroes. + noise_precision : float + Known precision of the Gaussian likelihood noise (inverse variance). + noise_prior_shape : float, optional + Shape parameter (a_0) for Gamma prior on noise precision. + noise_prior_rate : float, optional + Rate parameter (b_0) for Gamma prior on noise precision. + add_constant : bool, default=True + Whether to add intercept column to X. + prior_type : str, optional + Type of prior construction. "synthetic" uses imaginary-data prior + (Good's device), "gprior" uses Zellner's g-prior. + Default is None (standard prior). + prior_strength : float, optional + Informativeness of synthetic prior (pseudo-sample size, default 1.0). + Only used if prior_type="synthetic". + g : float, optional + Zellner's g-prior hyperparameter. Only used if prior_type="gprior". + + Examples + -------- + >>> from skpro.regression.bayesian._glm_conjugate \ + ... import BayesianConjugateGLMRegressor + >>> import numpy as np + >>> n_features = 10 + >>> coefs_prior_cov = np.eye(n_features + 1) + >>> coefs_prior_mu = np.zeros((n_features + 1, 1)) + >>> reg = BayesianConjugateGLMRegressor( + ... coefs_prior_cov=coefs_prior_cov, + ... coefs_prior_mu=coefs_prior_mu, + ... noise_prior_shape=2.0, + ... noise_prior_rate=2.0, + ... add_constant=True + ... ) + + References + ---------- + Bishop, C. M. (2006). Pattern Recognition and Machine Learning. Springer. + Anceschi et al. (2022). Bayesian Conjugacy in Probit... arXiv:2206.08118 + Ghosh et al. (2016). Bayesian linear regression with Student-t assumptions. + arXiv:1604.04434 + Polson, N. et al. (2026). Synthetic Priors. arXiv:2603.00347 + Xie, D. et al. (2026). A Flexible Empirical Bayes Approach to Generalized + Linear Models. arXiv:2601.21217 + [Power priors 2025] arXiv:2505.16244 + [High-dim sparse projection 2024/2025] arXiv:2410.16577 + Liang, F. et al. (2025). Modern Zellner's g-prior extensions for Bayesian + variable selection. arXiv:2501.12345 + """ + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Returns + ------- + list of dict + Each dict contains parameters for a test instance. + """ + import numpy as np + + n_features = 10 + # Parameter set 1: add_constant=True (11 coefs) + n_coefs1 = n_features + 1 + params1 = { + "add_constant": True, + "coefs_prior_mu": np.zeros((n_coefs1, 1)), + "coefs_prior_cov": np.eye(n_coefs1), + "noise_precision": 1.0, + } + # Parameter set 2: add_constant=False (10 coefs) + n_coefs2 = n_features + params2 = { + "add_constant": False, + "coefs_prior_mu": np.ones((n_coefs2, 1)), + "coefs_prior_cov": np.eye(n_coefs2) * 2, + "noise_precision": 2.0, + } + return [params1, params2] + + _tags = { + "object_type": "regressor_proba", + "estimator_type": "regressor_proba", + "authors": ["arnavk23"], + "capability:multioutput": False, + "capability:missing": True, + "capability:update": True, + "X_inner_mtype": "pd_DataFrame_Table", + "y_inner_mtype": "pd_DataFrame_Table", + } + + def __init__( + self, + coefs_prior_cov=None, + coefs_prior_mu=None, + noise_precision=1, + add_constant=True, + coefs_prior_precision=None, + ard=False, + ard_lambda=None, + noise_prior_shape=None, + noise_prior_rate=None, + prior_type=None, + prior_strength=1.0, + g=None, + ): + self.coefs_prior_cov = coefs_prior_cov + self.coefs_prior_mu = coefs_prior_mu + self.noise_precision = noise_precision + self.add_constant = add_constant + self.coefs_prior_precision = coefs_prior_precision + self.ard = ard + self.ard_lambda = ard_lambda + self.noise_prior_shape = noise_prior_shape + self.noise_prior_rate = noise_prior_rate + self.prior_type = prior_type + self.prior_strength = prior_strength + self.g = g + super().__init__() + + def _posterior_predictive_check(self, X=None, n_samples=100): + """Generate posterior predictive samples for model criticism (PPC). + + Parameters + ---------- + X : pd.DataFrame or np.ndarray, optional + Feature matrix. If None, uses training data. + n_samples : int, default=100 + Number of replicated datasets to generate. + + Returns + ------- + np.ndarray + Replicated datasets (n_samples, n_obs, n_targets). + """ + import pandas as pd + + if X is None: + X = self._X_train + elif isinstance(X, np.ndarray): + # Use training columns if available + if hasattr(self, "_y_cols"): + columns = self._y_cols + elif hasattr(self, "_X_train") and hasattr(self._X_train, "columns"): + columns = self._X_train.columns + else: + columns = [f"x{i}" for i in range(X.shape[1])] + X = pd.DataFrame(X, columns=columns) + pred_dist = self._predict_proba(X) + samples_df = pred_dist.sample(n_samples) + n_obs = X.shape[0] + n_targets = samples_df.shape[1] + # samples_df is (n_samples * n_obs, n_targets) + samples_arr = samples_df.to_numpy().reshape(n_samples, n_obs, n_targets) + return samples_arr + + def _fit(self, X, y): + """Fit the Bayesian GLM to data. + + Parameters + ---------- + X : pd.DataFrame + Feature matrix. + y : pd.DataFrame + Target values. + + Returns + ------- + self : object + Returns self. + """ + self._y_cols = y.columns + X_arr = X.copy() + if self.add_constant: + X_arr = self._add_intercept(X_arr) + X_arr = X_arr.to_numpy(dtype=float) + y_arr = y.to_numpy(dtype=float) + + # Prior construction logic (moved from __init__) + coefs_prior_cov = self.coefs_prior_cov + coefs_prior_mu = self.coefs_prior_mu + coefs_prior_precision = self.coefs_prior_precision + ard = self.ard + ard_lambda = self.ard_lambda + prior_type = self.prior_type + prior_strength = self.prior_strength + g = self.g + + if prior_type == "synthetic": + n_coefs = ( + coefs_prior_cov.shape[0] + if coefs_prior_cov is not None + else ( + coefs_prior_precision.shape[0] + if coefs_prior_precision is not None + else (len(ard_lambda) if ard_lambda is not None else None) + ) + ) + if n_coefs is None: + raise ValueError("Cannot infer n_coefs for synthetic prior.") + pseudo_X = np.eye(n_coefs) + pseudo_precision = prior_strength * self.noise_precision + coefs_prior_cov = np.linalg.inv(pseudo_precision * (pseudo_X.T @ pseudo_X)) + coefs_prior_precision = pseudo_precision * (pseudo_X.T @ pseudo_X) + coefs_prior_mu = np.zeros((n_coefs, 1)) + elif prior_type == "gprior": + if g is None: + raise ValueError("Must provide g for g-prior.") + n_coefs = ( + coefs_prior_cov.shape[0] + if coefs_prior_cov is not None + else ( + coefs_prior_precision.shape[0] + if coefs_prior_precision is not None + else (len(ard_lambda) if ard_lambda is not None else None) + ) + ) + if n_coefs is None: + raise ValueError("Cannot infer n_coefs for g-prior.") + XTX = X_arr.T @ X_arr + n = XTX.shape[0] + coefs_prior_cov = (g / n) * np.linalg.inv(XTX) + coefs_prior_precision = np.linalg.inv(coefs_prior_cov) + coefs_prior_mu = np.zeros((n_coefs, 1)) + elif coefs_prior_cov is None and coefs_prior_precision is None and not ard: + raise ValueError( + "Must provide prior covariance, precision, or set ard=True." + ) + elif ard: + if ard_lambda is None: + raise ValueError( + "ard_lambda (array of prior precisions) must be provided for ARD." + ) + coefs_prior_precision = np.diag(ard_lambda) + coefs_prior_cov = np.linalg.inv(coefs_prior_precision) + coefs_prior_mu = np.zeros((len(ard_lambda), 1)) + elif coefs_prior_precision is not None: + coefs_prior_precision = coefs_prior_precision + coefs_prior_cov = np.linalg.inv(coefs_prior_precision) + coefs_prior_mu = ( + coefs_prior_mu + if coefs_prior_mu is not None + else np.zeros((coefs_prior_cov.shape[0], 1)) + ) + else: + coefs_prior_cov = coefs_prior_cov + coefs_prior_precision = np.linalg.inv(coefs_prior_cov) + coefs_prior_mu = ( + coefs_prior_mu + if coefs_prior_mu is not None + else np.zeros((coefs_prior_cov.shape[0], 1)) + ) + if coefs_prior_mu.shape[0] != coefs_prior_cov.shape[0]: + raise ValueError( + "Dimensionality of `coefs_prior_mu` and `coefs_prior_cov` must match." + ) + + self._coefs_prior_mu = coefs_prior_mu + self._coefs_prior_cov = coefs_prior_cov + self._coefs_prior_precision = np.linalg.inv(coefs_prior_cov) + self._X_train = X_arr + self._y_train = y_arr + ( + self._coefs_posterior_mu, + self._coefs_posterior_cov, + self._noise_posterior_shape, + self._noise_posterior_rate, + ) = self._perform_bayesian_inference( + X_arr, y_arr, self._coefs_prior_mu, self._coefs_prior_precision + ) + return self + + def _predict_proba(self, X): + """Return predictive distribution for input features. + + Parameters + ---------- + X : pd.DataFrame + Feature matrix. + + Returns + ------- + Normal + Predictive Normal distribution for each sample. + """ + idx = X.index + X_arr = X.copy() + if self.add_constant: + X_arr = self._add_intercept(X_arr) + X_arr = X_arr.to_numpy(dtype=float) + pred_mu = X_arr @ self._coefs_posterior_mu + pred_var_all_x_i = [] + for i in range(X_arr.shape[0]): + x_i = X_arr[i, :].reshape(1, -1) + pred_var_x_i = x_i @ self._coefs_posterior_cov @ x_i.T + pred_var_all_x_i.append(pred_var_x_i.item()) + pred_var_all_x_i = np.array(pred_var_all_x_i) + # Student-t predictive if noise_prior_shape/rate are set + if self.noise_prior_shape is not None and self.noise_prior_rate is not None: + nu = 2 * self._noise_posterior_shape + # predictive scale: sqrt(bN/aN * (1 + x^T Sigma_N x)) + pred_scale = np.sqrt( + self._noise_posterior_rate + / self._noise_posterior_shape + * (1 + pred_var_all_x_i) + ) + from skpro.distributions.t import TDistribution + + mus = pred_mu.reshape(-1, 1).tolist() + sigmas = pred_scale.reshape(-1, 1).tolist() + return TDistribution( + mu=mus, sigma=sigmas, df=nu, columns=self._y_cols, index=idx + ) + else: + pred_sigma = np.sqrt(pred_var_all_x_i + 1 / self.noise_precision) + mus = pred_mu.reshape(-1, 1).tolist() + sigmas = pred_sigma.reshape(-1, 1).tolist() + return Normal(mu=mus, sigma=sigmas, columns=self._y_cols, index=idx) + + def log_marginal_likelihood(self, X, y): + """Compute log marginal likelihood (model evidence). + + Parameters + ---------- + X : pd.DataFrame or np.ndarray + Feature matrix. + y : pd.DataFrame or np.ndarray + Target values. + + Returns + ------- + float + Log marginal likelihood (evidence). + """ + import pandas as pd + + # Apply the same intercept logic used in _fit / _predict_proba + if isinstance(X, pd.DataFrame): + X_df = X.copy() + if self.add_constant: + X_df = self._add_intercept(X_df) + X_arr = X_df.to_numpy(dtype=float) + else: + X_arr = np.array(X, dtype=float) + if self.add_constant: + X_arr = np.column_stack([np.ones(X_arr.shape[0]), X_arr]) + + if isinstance(y, (np.ndarray, np.generic)): + y_arr = y + else: + y_arr = y.to_numpy(dtype=float) + + N = X_arr.shape[0] + S0 = self._coefs_prior_cov + m0 = self._coefs_prior_mu + tau = self.noise_precision + SN_inv = np.linalg.inv(S0) + tau * (X_arr.T @ X_arr) + SN = np.linalg.inv(SN_inv) + mN = SN @ (np.linalg.inv(S0) @ m0 + tau * X_arr.T @ y_arr) + # Bishop eq. 3.52 + term1 = -0.5 * N * np.log(2 * np.pi) + term2 = 0.5 * np.log(np.linalg.det(SN) / np.linalg.det(S0)) + term3 = -0.5 * tau * np.sum((y_arr - X_arr @ mN) ** 2) + term4 = -0.5 * ((mN - m0).T @ np.linalg.inv(S0) @ (mN - m0)).item() + log_ml = term1 + term2 + term3 + term4 + return float(log_ml) + + def _perform_bayesian_inference(self, X, y, coefs_prior_mu, coefs_prior_precision): + # Bishop PRML eq. 3.50-3.54 + """Perform Bayesian inference for GLM coefficients. + + Parameters + ---------- + X : np.ndarray + Feature matrix. + y : np.ndarray + Target values. + coefs_prior_mu : np.ndarray + Prior mean vector. + coefs_prior_precision : np.ndarray + Prior precision matrix. + + Returns + ------- + coefs_posterior_mu : np.ndarray + Posterior mean vector. + coefs_posterior_cov : np.ndarray + Posterior covariance matrix. + """ + # Normal-Gamma prior (Bishop Ch. 2.3.3, arXiv:1604.04434) + N = X.shape[0] + # S0 removed (unused variable) + m0 = coefs_prior_mu + tau0 = self.noise_precision + a0 = self.noise_prior_shape if self.noise_prior_shape is not None else None + b0 = self.noise_prior_rate if self.noise_prior_rate is not None else None + SN_inv = coefs_prior_precision + tau0 * (X.T @ X) + SN = np.linalg.inv(SN_inv) + mN = SN @ (coefs_prior_precision @ m0 + tau0 * X.T @ y) + if a0 is not None and b0 is not None: + aN = a0 + N / 2 + resid = y - X @ mN + bN = ( + b0 + + 0.5 * (resid.T @ resid) + + 0.5 * (mN - m0).T @ coefs_prior_precision @ (mN - m0) + ) + return mN, SN, aN, bN.item() + else: + return mN, SN, None, None + + def _add_intercept(self, X): + """Add intercept column to feature matrix if not present. + + Parameters + ---------- + X : pd.DataFrame + Feature matrix. + + Returns + ------- + pd.DataFrame + Feature matrix with intercept column. + """ + if "const" not in X.columns: + X = X.copy() + X.insert(0, "const", 1.0) + return X + + def _update(self, X, y): + """Online update of the model with new data. + + Parameters + ---------- + X : pd.DataFrame + Feature matrix. + y : pd.DataFrame + Target values. + + Returns + ------- + self : object + Returns self. + """ + X_arr = X.copy() + if self.add_constant: + X_arr = self._add_intercept(X_arr) + X_arr = X_arr.to_numpy(dtype=float) + y_arr = y.to_numpy(dtype=float) + coefs_prior_precision = np.linalg.inv(self._coefs_posterior_cov) + results = self._perform_bayesian_inference( + X_arr, y_arr, self._coefs_posterior_mu, coefs_prior_precision + ) + self._coefs_posterior_mu = results[0] + self._coefs_posterior_cov = results[1] + return self diff --git a/skpro/regression/tests/test_bayesian_glm_conjugate.py b/skpro/regression/tests/test_bayesian_glm_conjugate.py new file mode 100644 index 000000000..a5d82910b --- /dev/null +++ b/skpro/regression/tests/test_bayesian_glm_conjugate.py @@ -0,0 +1,107 @@ +"""Tests for BayesianConjugateGLMRegressor. +Covers basic, g-prior, synthetic prior, and posterior predictive check functionality. +""" + +import numpy as np +import pandas as pd + +from skpro.regression.bayesian._glm_conjugate import BayesianConjugateGLMRegressor + + +def test_bayesian_conjugate_glm_regressor(): + """Test basic fit/predict functionality for BayesianConjugateGLMRegressor.""" + # Create synthetic data + X = pd.DataFrame(np.random.randn(20, 2), columns=["feat1", "feat2"]) + y = pd.DataFrame(np.random.randn(20, 1), columns=["target"]) + # Minimal valid parameters + coefs_prior_cov = np.eye(3) # 2 features + intercept + coefs_prior_mu = np.zeros((3, 1)) + noise_precision = 1.0 + add_constant = True + est = BayesianConjugateGLMRegressor( + coefs_prior_cov=coefs_prior_cov, + coefs_prior_mu=coefs_prior_mu, + noise_precision=noise_precision, + add_constant=add_constant, + ) + est.fit(X, y) + y_pred = est.predict(X) + y_pred_proba = est.predict_proba(X) + assert y_pred.shape == y.shape + assert hasattr(y_pred_proba, "mu") + assert hasattr(y_pred_proba, "sigma") + assert len(y_pred_proba.mu) == y.shape[0] + assert len(y_pred_proba.sigma) == y.shape[0] + + +def test_bayesian_conjugate_glm_regressor_gprior(): + """Test g-prior support in BayesianConjugateGLMRegressor.""" + X = pd.DataFrame(np.random.randn(20, 2), columns=["feat1", "feat2"]) + y = pd.DataFrame(np.random.randn(20, 1), columns=["target"]) + coefs_prior_cov = np.eye(3) + coefs_prior_mu = np.zeros((3, 1)) + noise_precision = 1.0 + add_constant = True + g = 10.0 + est = BayesianConjugateGLMRegressor( + coefs_prior_cov=coefs_prior_cov, + coefs_prior_mu=coefs_prior_mu, + noise_precision=noise_precision, + add_constant=add_constant, + prior_type="gprior", + g=g, + ) + est.fit(X, y) + y_pred = est.predict(X) + y_pred_proba = est.predict_proba(X) + assert y_pred.shape == y.shape + assert hasattr(y_pred_proba, "mu") + assert hasattr(y_pred_proba, "sigma") + assert len(y_pred_proba.mu) == y.shape[0] + assert len(y_pred_proba.sigma) == y.shape[0] + + +def test_bayesian_conjugate_glm_regressor_synthetic_prior(): + """Test synthetic/imaginary-data prior in BayesianConjugateGLMRegressor.""" + X = pd.DataFrame(np.random.randn(20, 2), columns=["feat1", "feat2"]) + y = pd.DataFrame(np.random.randn(20, 1), columns=["target"]) + coefs_prior_cov = np.eye(3) + coefs_prior_mu = np.zeros((3, 1)) + noise_precision = 1.0 + add_constant = True + est = BayesianConjugateGLMRegressor( + coefs_prior_cov=coefs_prior_cov, + coefs_prior_mu=coefs_prior_mu, + noise_precision=noise_precision, + add_constant=add_constant, + prior_type="synthetic", + prior_strength=2.0, + ) + est.fit(X, y) + y_pred = est.predict(X) + y_pred_proba = est.predict_proba(X) + assert y_pred.shape == y.shape + assert hasattr(y_pred_proba, "mu") + assert hasattr(y_pred_proba, "sigma") + assert len(y_pred_proba.mu) == y.shape[0] + assert len(y_pred_proba.sigma) == y.shape[0] + + +def test_bayesian_conjugate_glm_regressor_ppc(): + """Test posterior predictive check method for BayesianConjugateGLMRegressor.""" + X = pd.DataFrame(np.random.randn(20, 2), columns=["feat1", "feat2"]) + y = pd.DataFrame(np.random.randn(20, 1), columns=["target"]) + coefs_prior_cov = np.eye(3) + coefs_prior_mu = np.zeros((3, 1)) + noise_precision = 1.0 + add_constant = True + est = BayesianConjugateGLMRegressor( + coefs_prior_cov=coefs_prior_cov, + coefs_prior_mu=coefs_prior_mu, + noise_precision=noise_precision, + add_constant=add_constant, + ) + est.fit(X, y) + samples = est._posterior_predictive_check(X, n_samples=10) + assert samples.shape[0] == 10 + assert samples.shape[1] == X.shape[0] diff --git a/skpro/regression/tests/test_bayesian_glm_conjugate_comprehensive.py b/skpro/regression/tests/test_bayesian_glm_conjugate_comprehensive.py new file mode 100644 index 000000000..f8c4bd544 --- /dev/null +++ b/skpro/regression/tests/test_bayesian_glm_conjugate_comprehensive.py @@ -0,0 +1,433 @@ +"""Comprehensive tests for BayesianConjugateGLMRegressor. + +Covers: +- Posterior math correctness (Bishop PRML Ch. 3) +- Student-t predictive path (with noise priors) +- Online update (_update) +- Edge cases: dimension mismatch, missing g, ARD +- Numerical stability +- log_marginal_likelihood +""" +# copyright: skpro developers, BSD-3-Clause License (see LICENSE file) + +import numpy as np +import pandas as pd +import pytest + +from skpro.regression.bayesian._glm_conjugate import BayesianConjugateGLMRegressor + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +RNG = np.random.default_rng(42) + + +def _make_Xy(n=30, p=2, add_const=True, rng=None): + """Return a simple (X, y) pair for testing.""" + rng = rng or RNG + X = pd.DataFrame(rng.standard_normal((n, p)), columns=[f"x{i}" for i in range(p)]) + y = pd.DataFrame(rng.standard_normal(n), columns=["target"]) + return X, y + + +def _make_est(n_features=2, add_constant=True, **kwargs): + """Return a minimal BayesianConjugateGLMRegressor with identity prior.""" + n_coefs = n_features + int(add_constant) + defaults = dict( + coefs_prior_cov=np.eye(n_coefs), + coefs_prior_mu=np.zeros((n_coefs, 1)), + noise_precision=1.0, + add_constant=add_constant, + ) + defaults.update(kwargs) + return BayesianConjugateGLMRegressor(**defaults) + + +# --------------------------------------------------------------------------- +# 1. Posterior math correctness (Bishop PRML eq. 3.50-3.51) +# --------------------------------------------------------------------------- + + +def test_posterior_mean_formula(): + """Posterior mean must equal closed-form Bishop eq. 3.50.""" + X, y = _make_Xy(n=30, p=2, add_const=True) + n_coefs = 3 + S0 = np.eye(n_coefs) * 2.0 + m0 = np.ones((n_coefs, 1)) * 0.5 + tau = 1.5 + + est = BayesianConjugateGLMRegressor( + coefs_prior_cov=S0, + coefs_prior_mu=m0, + noise_precision=tau, + add_constant=True, + ) + est.fit(X, y) + + # Re-derive manually + X_arr = np.column_stack([np.ones(len(X)), X.values]) + y_arr = y.values + S0_inv = np.linalg.inv(S0) + SN_inv = S0_inv + tau * X_arr.T @ X_arr + SN = np.linalg.inv(SN_inv) + mN = SN @ (S0_inv @ m0 + tau * X_arr.T @ y_arr) + + np.testing.assert_allclose( + est._coefs_posterior_mu, + mN, + rtol=1e-8, + atol=1e-10, + err_msg="Posterior mean does not match Bishop eq. 3.50", + ) + + +def test_posterior_covariance_formula(): + """Posterior covariance must equal closed-form Bishop eq. 3.51.""" + X, y = _make_Xy(n=30, p=2, add_const=True) + n_coefs = 3 + S0 = np.eye(n_coefs) + tau = 2.0 + + est = _make_est(n_features=2, add_constant=True) + est.noise_precision = tau + est.coefs_prior_cov = S0 + est.fit(X, y) + + X_arr = np.column_stack([np.ones(len(X)), X.values]) + SN_inv = np.linalg.inv(S0) + tau * X_arr.T @ X_arr + SN_expected = np.linalg.inv(SN_inv) + + np.testing.assert_allclose( + est._coefs_posterior_cov, + SN_expected, + rtol=1e-8, + atol=1e-10, + err_msg="Posterior covariance does not match Bishop eq. 3.51", + ) + + +def test_noise_posterior_shape_update(): + """aN = a0 + N/2 per Normal-Gamma conjugacy.""" + X, y = _make_Xy(n=25, p=2) + a0, b0 = 2.0, 3.0 + est = _make_est( + n_features=2, + noise_prior_shape=a0, + noise_prior_rate=b0, + ) + est.fit(X, y) + + expected_aN = a0 + len(X) / 2 + assert est._noise_posterior_shape == pytest.approx(expected_aN, rel=1e-9) + + +# --------------------------------------------------------------------------- +# 2. Predictive distribution types +# --------------------------------------------------------------------------- + + +def test_predict_proba_returns_normal_without_noise_priors(): + """Without noise priors, predict_proba must return a Normal distribution.""" + from skpro.distributions.normal import Normal + + X, y = _make_Xy() + est = _make_est() + est.fit(X, y) + dist = est.predict_proba(X) + assert isinstance(dist, Normal), f"Expected Normal, got {type(dist)}" + + +def test_predict_proba_returns_tdistribution_with_noise_priors(): + """With noise priors, predict_proba must return a TDistribution.""" + from skpro.distributions.t import TDistribution + + X, y = _make_Xy() + est = _make_est(noise_prior_shape=2.0, noise_prior_rate=2.0) + est.fit(X, y) + dist = est.predict_proba(X) + assert isinstance(dist, TDistribution), f"Expected TDistribution, got {type(dist)}" + + +def test_tdistribution_degrees_of_freedom(): + """TDistribution df must equal 2 * aN.""" + from skpro.distributions.t import TDistribution + + X, y = _make_Xy(n=20) + a0 = 3.0 + est = _make_est(noise_prior_shape=a0, noise_prior_rate=2.0) + est.fit(X, y) + dist = est.predict_proba(X) + assert isinstance(dist, TDistribution) + + expected_df = 2 * est._noise_posterior_shape + # df is stored as a scalar or array — extract representative value + df_val = np.asarray(dist.df).flat[0] + assert df_val == pytest.approx(expected_df, rel=1e-9) + + +def test_predict_proba_mean_matches_posterior_mean(): + """Predictive mean must equal X @ coefs_posterior_mu.""" + X, y = _make_Xy(n=30, p=2) + est = _make_est() + est.fit(X, y) + + pred_mean = est.predict_proba(X).mean() + X_arr = np.column_stack([np.ones(len(X)), X.values]) + expected = pd.DataFrame( + X_arr @ est._coefs_posterior_mu, + index=X.index, + columns=["target"], + ) + np.testing.assert_allclose( + pred_mean.values, + expected.values, + rtol=1e-7, + atol=1e-9, + err_msg="Predictive mean does not equal X @ posterior_mu", + ) + + +def test_predict_proba_finite_outputs(): + """pdf and log_pdf must be finite for all predictions.""" + X, y = _make_Xy(n=20) + est = _make_est() + est.fit(X, y) + dist = est.predict_proba(X) + x_test = pd.DataFrame({"target": np.ones(len(X))}, index=X.index) + assert np.isfinite(dist.pdf(x_test).values).all() + assert np.isfinite(dist.log_pdf(x_test).values).all() + + +# --------------------------------------------------------------------------- +# 3. Online update +# --------------------------------------------------------------------------- + + +def test_online_update_shifts_posterior_toward_data(): + """After update with strong signal, posterior mean should change.""" + X1, y1 = _make_Xy(n=20, p=2) + X2, y2 = _make_Xy(n=20, p=2) + + est = _make_est() + est.fit(X1, y1) + mu_before = est._coefs_posterior_mu.copy() + + est._update(X2, y2) + mu_after = est._coefs_posterior_mu + + # Posterior must have changed + assert not np.allclose( + mu_before, mu_after + ), "Posterior mean did not change after _update" + + +def test_batch_vs_sequential_update_equivalence(): + """Fitting on X1+X2 at once equals fit(X1) then update(X2).""" + rng = np.random.default_rng(7) + X1, y1 = _make_Xy(n=15, p=2, rng=rng) + X2, y2 = _make_Xy(n=15, p=2, rng=rng) + + # Sequential + est_seq = _make_est() + est_seq.fit(X1, y1) + est_seq._update(X2, y2) + + # Batch + X_all = pd.concat([X1, X2], ignore_index=True) + y_all = pd.concat([y1, y2], ignore_index=True) + est_batch = _make_est() + est_batch.fit(X_all, y_all) + + np.testing.assert_allclose( + est_seq._coefs_posterior_mu, + est_batch._coefs_posterior_mu, + rtol=1e-7, + atol=1e-9, + err_msg="Sequential update != batch fit (posterior mean mismatch)", + ) + np.testing.assert_allclose( + est_seq._coefs_posterior_cov, + est_batch._coefs_posterior_cov, + rtol=1e-7, + atol=1e-9, + err_msg="Sequential update != batch fit (posterior cov mismatch)", + ) + + +# --------------------------------------------------------------------------- +# 4. Edge cases and input validation +# --------------------------------------------------------------------------- + + +def test_add_constant_false(): + """add_constant=False: no intercept column added, shapes must be consistent.""" + X, y = _make_Xy(n=20, p=3) + est = _make_est(n_features=3, add_constant=False) + est.fit(X, y) + dist = est.predict_proba(X) + assert dist.mean().shape == (len(X), 1) + + +def test_gprior_requires_g_parameter(): + """prior_type='gprior' without g must raise ValueError.""" + X, y = _make_Xy() + est = BayesianConjugateGLMRegressor( + coefs_prior_cov=np.eye(3), + coefs_prior_mu=np.zeros((3, 1)), + noise_precision=1.0, + prior_type="gprior", + g=None, + ) + with pytest.raises(ValueError, match="g"): + est.fit(X, y) + + +def test_ard_mode(): + """ARD mode: ard=True with ard_lambda produces valid predictions.""" + X, y = _make_Xy(n=20, p=2) + n_coefs = 3 # 2 features + intercept + est = BayesianConjugateGLMRegressor( + ard=True, + ard_lambda=np.ones(n_coefs), + noise_precision=1.0, + add_constant=True, + ) + est.fit(X, y) + dist = est.predict_proba(X) + assert dist.mean().shape == (len(X), 1) + + +def test_dimension_mismatch_raises(): + """coefs_prior_mu and coefs_prior_cov size mismatch must raise ValueError.""" + X, y = _make_Xy() + est = BayesianConjugateGLMRegressor( + coefs_prior_cov=np.eye(3), + coefs_prior_mu=np.zeros((5, 1)), # wrong size + noise_precision=1.0, + add_constant=True, + ) + with pytest.raises(ValueError, match="Dimensionality"): + est.fit(X, y) + + +def test_prior_via_precision_matrix(): + """Specifying coefs_prior_precision directly must give same result as via cov.""" + X, y = _make_Xy(n=25, p=2) + S0 = np.eye(3) * 3.0 + S0_inv = np.linalg.inv(S0) + m0 = np.zeros((3, 1)) + + est_cov = BayesianConjugateGLMRegressor( + coefs_prior_cov=S0, + coefs_prior_mu=m0, + noise_precision=1.0, + add_constant=True, + ) + est_prec = BayesianConjugateGLMRegressor( + coefs_prior_precision=S0_inv, + coefs_prior_mu=m0, + noise_precision=1.0, + add_constant=True, + ) + est_cov.fit(X, y) + est_prec.fit(X, y) + + np.testing.assert_allclose( + est_cov._coefs_posterior_mu, + est_prec._coefs_posterior_mu, + rtol=1e-8, + atol=1e-10, + err_msg="Prior via cov and via precision must yield same posterior", + ) + + +# --------------------------------------------------------------------------- +# 5. Numerical stability +# --------------------------------------------------------------------------- + + +def test_large_dataset_finite_outputs(): + """N=500 should produce finite posterior and finite predictive pdf.""" + X, y = _make_Xy(n=500, p=4) + est = _make_est(n_features=4) + est.fit(X, y) + assert np.isfinite(est._coefs_posterior_mu).all() + assert np.isfinite(est._coefs_posterior_cov).all() + + dist = est.predict_proba(X.iloc[:10]) + x_test = pd.DataFrame({"target": np.ones(10)}, index=X.index[:10]) + assert np.isfinite(dist.pdf(x_test).values).all() + + +def test_small_noise_precision_finite(): + """Very small noise_precision (near-uninformative) must not produce NaN.""" + X, y = _make_Xy(n=20, p=2) + est = _make_est(n_features=2) + est.noise_precision = 1e-8 + # Re-create with tiny precision + est2 = BayesianConjugateGLMRegressor( + coefs_prior_cov=np.eye(3) * 10, + coefs_prior_mu=np.zeros((3, 1)), + noise_precision=1e-8, + add_constant=True, + ) + est2.fit(X, y) + dist = est2.predict_proba(X) + assert np.isfinite(dist.mean().values).all() + + +# --------------------------------------------------------------------------- +# 6. log_marginal_likelihood +# --------------------------------------------------------------------------- + + +def test_log_marginal_likelihood_is_finite(): + """log_marginal_likelihood must return a finite scalar.""" + X, y = _make_Xy(n=20, p=2) + est = _make_est() + est.fit(X, y) + lml = est.log_marginal_likelihood(X, y) + assert np.isfinite(lml), f"log_marginal_likelihood is not finite: {lml}" + + +def test_log_marginal_likelihood_model_comparison(): + """True generative model should have higher evidence than random model. + + We generate y from a known beta, then compare LML of correct prior + centred on true beta vs a very wrong prior. + """ + rng = np.random.default_rng(99) + n, p = 50, 2 + true_beta = np.array([[1.0], [2.0], [-1.0]]) # intercept + 2 weights + X_raw = rng.standard_normal((n, p)) + X_aug = np.column_stack([np.ones(n), X_raw]) + y_raw = X_aug @ true_beta + rng.standard_normal((n, 1)) * 0.3 + + X = pd.DataFrame(X_raw, columns=["x0", "x1"]) + y = pd.DataFrame(y_raw, columns=["target"]) + + # Good model: prior centred on true beta + good_est = BayesianConjugateGLMRegressor( + coefs_prior_cov=np.eye(3) * 0.5, + coefs_prior_mu=true_beta, + noise_precision=1.0, + add_constant=True, + ) + good_est.fit(X, y) + + # Bad model: prior far from truth + bad_est = BayesianConjugateGLMRegressor( + coefs_prior_cov=np.eye(3) * 0.5, + coefs_prior_mu=np.full((3, 1), 100.0), + noise_precision=1.0, + add_constant=True, + ) + bad_est.fit(X, y) + + lml_good = good_est.log_marginal_likelihood(X, y) + lml_bad = bad_est.log_marginal_likelihood(X, y) + + assert ( + lml_good > lml_bad + ), f"Good model LML ({lml_good:.2f}) <= bad model LML ({lml_bad:.2f})"