Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
10f6dbb
Fix: robustify UnconditionalDistfitRegressor for distfit 2.0.1
arnavk23 Mar 25, 2026
0ae1b7e
pre-commit
arnavk23 Mar 25, 2026
c7cfccd
fix failing tests
arnavk23 Mar 25, 2026
fcf5496
black
arnavk23 Mar 25, 2026
f2c1768
get_params
arnavk23 Mar 25, 2026
8b56b98
doctest
arnavk23 Mar 25, 2026
34a76d8
removing faults
arnavk23 Mar 25, 2026
0ff147a
blocking kdeusage due to scipy.stats.kde deprecation in distfit, see
arnavk23 Mar 25, 2026
1cd3365
pre-commit
arnavk23 Mar 25, 2026
3c99ced
fit_kde = False
arnavk23 Mar 25, 2026
92ce29a
fixing kde parameter in UnconditionalDistfitRegressor and related tes…
arnavk23 Mar 25, 2026
3ce9aaa
kde support is removed due to scipy.stats.kde deprecation in distfit.
arnavk23 Mar 25, 2026
05a8cbf
black
arnavk23 Mar 25, 2026
f57dfd4
trying to fix failing checks
arnavk23 Mar 25, 2026
d42ee14
changes
arnavk23 Mar 25, 2026
1f3d9a5
pre-commit
arnavk23 Mar 25, 2026
da3e088
mean: Update unconditional_distfit.py to handle dict models with 'loc…
arnavk23 Mar 25, 2026
bfce196
fixing failed output check
arnavk23 Mar 25, 2026
269f59b
flake
arnavk23 Mar 25, 2026
b65d7b8
Update unconditional_distfit.py
arnavk23 Mar 25, 2026
675ecb0
Fix doctest and flake8 import for UnconditionalDistfitRegressor; use …
arnavk23 Mar 26, 2026
31556e5
Merge branch 'feature/baseline-unconditional-densities' of https://ww…
arnavk23 Mar 26, 2026
9b18283
tags
arnavk23 Mar 26, 2026
8a6cfb7
pre-commit
arnavk23 Mar 28, 2026
6c817af
fixing further issues with the baseline regressors, and adding a test…
arnavk23 Apr 11, 2026
5933e56
Merge branch 'feature/baseline-unconditional-densities' of https://ww…
arnavk23 Apr 11, 2026
2eb7949
pre-commit
arnavk23 Apr 11, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions docs/source/api_reference/regression.rst
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,21 @@ This section lists simple regressors which can be used as baselines.
DeltaPointRegressor
DummyProbaRegressor

.. currentmodule:: skpro.regression.unconditional_distfit

.. autosummary::
:toctree: auto_generated/
:template: class.rst

UnconditionalDistfitRegressor

.. currentmodule:: skpro.regression.deterministic_reduction

.. autosummary::
:toctree: auto_generated/
:template: class.rst

DeterministicReductionRegressor

Linear regression
-----------------
Expand Down
27 changes: 27 additions & 0 deletions examples/baseline_regressors_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""Example usage for baseline probabilistic regressors."""
import logging

import numpy as np
from sklearn.linear_model import LinearRegression

from skpro.regression.deterministic_reduction import DeterministicReductionRegressor
from skpro.regression.unconditional_distfit import UnconditionalDistfitRegressor

# Generate synthetic data
X = np.random.randn(100, 3)
y = 2 * X[:, 0] + np.random.randn(100)

# 1. Unconditional density baseline (featureless)
reg1 = UnconditionalDistfitRegressor()
reg1.fit(X, y)
dist1 = reg1.predict_proba(X)
logging.info("UnconditionalDistfitRegressor mean: %s", dist1.mean())
logging.info("Sample from unconditional: %s", dist1.sample(5))

# 2. Deterministic-style baseline (mean from regressor, constant variance)
reg2 = DeterministicReductionRegressor(LinearRegression(), distr_type="gaussian")
reg2.fit(X, y)
dist2 = reg2.predict_proba(X)
logging.info("DeterministicReductionRegressor mean: %s", dist2.mean)
logging.info("DeterministicReductionRegressor sigma: %s", dist2.sigma)
logging.info("Sample from deterministic baseline: %s", dist2.sample(5))
21 changes: 21 additions & 0 deletions examples/baseline_regressors_kde_hist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Example: unconditional norm and laplace baselines with distfit."""
import logging

import numpy as np

from skpro.regression.unconditional_distfit import UnconditionalDistfitRegressor

X = np.random.randn(80, 2)
y = np.random.randn(80)

# Distfit norm baseline
reg_norm = UnconditionalDistfitRegressor(distr_type="norm")
reg_norm.fit(X, y)
dist_norm = reg_norm.predict_proba(X)
logging.info("Norm baseline mean: %s", dist_norm.mean())

# Distfit laplace baseline
reg_laplace = UnconditionalDistfitRegressor(distr_type="laplace")
reg_laplace.fit(X, y)
dist_laplace = reg_laplace.predict_proba(X)
logging.info("Laplace baseline mean: %s", dist_laplace.mean())
40 changes: 40 additions & 0 deletions examples/benchmark_baseline_regressors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""Benchmark script for baseline probabilistic regressors."""
import logging

import numpy as np
from sklearn.linear_model import LinearRegression

from skpro.metrics import PinballLoss
from skpro.regression.deterministic_reduction import DeterministicReductionRegressor
from skpro.regression.unconditional_distfit import UnconditionalDistfitRegressor

# Generate synthetic data
X = np.random.randn(200, 5)
y = 3 * X[:, 0] - 2 * X[:, 1] + np.random.randn(200)

# Split
X_train, X_test = X[:150], X[150:]
y_train, y_test = y[:150], y[150:]

# Baseline 1: Unconditional
reg1 = UnconditionalDistfitRegressor()
reg1.fit(X_train, y_train)
dist1 = reg1.predict_proba(X_test)

# Baseline 2: Deterministic reduction
reg2 = DeterministicReductionRegressor(LinearRegression(), distr_type="gaussian")
reg2.fit(X_train, y_train)
dist2 = reg2.predict_proba(X_test)

# Evaluate pinball loss at alpha=0.1, 0.5, 0.9
alphas = [0.1, 0.5, 0.9]
for alpha in alphas:
loss1 = PinballLoss(alpha=alpha)(y_test, dist1)
loss2 = PinballLoss(alpha=alpha)(y_test, dist2)
logging.info(
"Alpha=%s: UnconditionalDistfitRegressor pinball loss=%.4f, "
"DeterministicReductionRegressor pinball loss=%.4f",
alpha,
loss1,
loss2,
)
3 changes: 3 additions & 0 deletions skpro/distributions/tests/test_proba_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ def test_proba_index_coercion():
@pytest.mark.parametrize("fun", ["pdf", "ppf", "cdf"])
def test_proba_plotting(fun):
"""Test that plotting functions do not crash and return ax as expected."""
import matplotlib

matplotlib.use("Agg", force=True)
from matplotlib.axes import Axes
from matplotlib.figure import Figure

Expand Down
4 changes: 4 additions & 0 deletions skpro/regression/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@
MapieCrossConformalRegressor,
MapieSplitConformalRegressor,
)
from skpro.regression.deterministic_reduction import DeterministicReductionRegressor
from skpro.regression.jackknife import MapieJackknifeAfterBootstrapRegressor
from skpro.regression.nonparametric import NadarayaWatsonCDE
from skpro.regression.unconditional_distfit import UnconditionalDistfitRegressor

__all__ = [
"DeterministicReductionRegressor",
"MapieSplitConformalRegressor",
"MapieCrossConformalRegressor",
"MapieConformalizedQuantileRegressor",
"MapieJackknifeAfterBootstrapRegressor",
"NadarayaWatsonCDE",
"UnconditionalDistfitRegressor",
]
150 changes: 150 additions & 0 deletions skpro/regression/deterministic_reduction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
"""Deterministic regression reduction baseline.

Outputs Gaussian (or Laplace) with mean=prediction, var=training sample var.
"""

import numpy as np

from skpro.distributions.laplace import Laplace
from skpro.distributions.normal import Normal
from skpro.regression.base import BaseProbaRegressor


class DeterministicReductionRegressor(BaseProbaRegressor):
"""
Wraps a deterministic regressor to output a Gaussian or Laplace.

The output has mean=prediction, var=training sample var.
Multi-output y is not supported (raises NotImplementedError).

Examples
--------
>>> from sklearn.linear_model import LinearRegression
>>> from skpro.regression.deterministic_reduction import (
... DeterministicReductionRegressor
... )
>>> import pandas as pd
>>> X = pd.DataFrame({"a": [1, 2, 3]})
>>> y = pd.DataFrame([1, 2, 3])
>>> reg = DeterministicReductionRegressor(
... LinearRegression(),
... distr_type="gaussian"
... )
>>> reg.fit(X, y) # doctest: +ELLIPSIS
DeterministicReductionRegressor(...)
>>> dist = reg.predict_proba(X)
>>> dist.mean() # doctest: +NORMALIZE_WHITESPACE
0
0 1.0
1 2.0
2 3.0

References
----------
- Gaussian Processes for State Space Models and Change Point Detection
(Turner, 2011 thesis). https://mlg.eng.cam.ac.uk/pub/pdf/Tur11.pdf
- A Probabilistic View of Linear Regression
(Bishop, PRML; Keng, 2016; various tutorials).
- mlr3proba and related probabilistic ML frameworks.
- Efficient and Distance-Aware Deep Regressor for Uncertainty Quantification
(Bui et al., 2024).
https://proceedings.mlr.press/v238/manh-bui24a/manh-bui24a.pdf
"""

_tags = {
"authors": ["arnavk23"],
"estimator_type": "regressor_proba",
# estimator tags
# --------------
"capability:multioutput": False,
"capability:missing": True,
"X_inner_mtype": "pd_DataFrame_Table",
"y_inner_mtype": "pd_DataFrame_Table",
}

def __init__(self, regressor, distr_type="gaussian"):
allowed_types = ["gaussian", "laplace"]
if distr_type not in allowed_types:
raise ValueError(
f"distr_type must be one of {allowed_types}, got {distr_type}"
)
self.regressor = regressor
self.distr_type = distr_type
super().__init__()

def _fit(self, X, y, C=None):
# Ensure X and y are DataFrames with string column names
import pandas as pd
from sklearn.base import clone

if not isinstance(X, pd.DataFrame):
X = pd.DataFrame(X)
X = X.copy()
X.columns = [str(col) for col in X.columns]
if not isinstance(y, pd.DataFrame):
y = pd.DataFrame(y)
y = y.copy()
y.columns = [str(col) for col in y.columns]
if y.shape[1] > 1:
raise NotImplementedError(
"DeterministicReductionRegressor only supports univariate y. "
f"Got shape: {y.shape}"
)
self._X_cols = X.columns
self._y_cols = y.columns
self._X_index = X.index
self._y_index = y.index
# Clone the regressor to avoid mutating the parameter
self.regressor_ = clone(self.regressor)
self.regressor_ = self.regressor_.fit(
X, y.values.ravel() if y.shape[1] == 1 else y
)
y_arr = y.values.flatten()
self.train_mean_ = np.mean(y_arr)
self.train_var_ = np.var(y_arr)
return self

def _predict_proba(self, X):
import pandas as pd

# Ensure X is a DataFrame with string column names
if not isinstance(X, pd.DataFrame):
X = pd.DataFrame(X, columns=self._X_cols)
X = X.copy()
X.columns = [str(col) for col in X.columns]
mean_pred = self.regressor_.predict(X)
# Ensure output shape matches y
if mean_pred.ndim == 1:
mean_pred = mean_pred.reshape(-1, 1)
# Return distribution with correct index/columns
if self.distr_type == "gaussian":
return Normal(
mu=mean_pred,
sigma=np.sqrt(self.train_var_),
index=X.index,
columns=self._y_cols,
)
if self.distr_type == "laplace":
# Laplace scale = sqrt(var/2)
return Laplace(
mu=mean_pred,
scale=np.sqrt(self.train_var_ / 2),
index=X.index,
columns=self._y_cols,
)
raise ValueError(f"Unknown distr_type: {self.distr_type}")

def get_params(self, deep=True):
"""Get parameters for this estimator."""
# Only return true hyperparameters, not fitted attributes
return {"regressor": self.regressor, "distr_type": self.distr_type}

@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter sets for automated tests."""
from sklearn.linear_model import LinearRegression

return [
{"regressor": LinearRegression(), "distr_type": "gaussian"},
{"regressor": LinearRegression(), "distr_type": "laplace"},
]
Loading
Loading