Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/source/api_reference/regression.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ Composition

Pipeline
TransformedTargetRegressor
TargetTransform
DistrPredictiveCalibration

Model selection and tuning
--------------------------
Expand Down
8 changes: 8 additions & 0 deletions skpro/distributions/tests/test_proba_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,13 @@ def test_proba_index_coercion():
@pytest.mark.parametrize("fun", ["pdf", "ppf", "cdf"])
def test_proba_plotting(fun):
"""Test that plotting functions do not crash and return ax as expected."""
import matplotlib

matplotlib.use("Agg", force=True)

from matplotlib.axes import Axes
from matplotlib.figure import Figure
from matplotlib.pyplot import close

from skpro.distributions.normal import Normal

Expand All @@ -125,6 +130,7 @@ def test_proba_plotting(fun):
assert ax.shape == n.shape
assert all([isinstance(a, Axes) for a in ax.flatten()])
assert all([a.get_figure() == fig for a in ax.flatten()])
close(fig)

# 1D case requires special treatment of axes
n = Normal(mu=[[1], [2], [3]], sigma=1)
Expand All @@ -134,11 +140,13 @@ def test_proba_plotting(fun):
assert ax.shape == (n.shape[0],)
assert all([isinstance(a, Axes) for a in ax.flatten()])
assert all([a.get_figure() == fig for a in ax.flatten()])
close(fig)

# scalar case
n = Normal(mu=1, sigma=1)
ax = n.plot(fun=fun)
assert isinstance(ax, Axes)
close(ax.figure)


@pytest.mark.skip(reason="Undiagnosed failure. Skipping until resolved. See #918.")
Expand Down
6 changes: 6 additions & 0 deletions skpro/regression/compose/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,14 @@

from skpro.regression.compose._pipeline import Pipeline
from skpro.regression.compose._ttr import TransformedTargetRegressor
from skpro.regression.compose.distr_predictive_calibration import (
DistrPredictiveCalibration,
)
from skpro.regression.compose.target_transform import TargetTransform

__all__ = [
"Pipeline",
"TransformedTargetRegressor",
"TargetTransform",
"DistrPredictiveCalibration",
]
134 changes: 134 additions & 0 deletions skpro/regression/compose/distr_predictive_calibration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
"""Implements predictive target calibration for probabilistic regression."""
# copyright: skpro developers, BSD-3-Clause License (see LICENSE file)

__author__ = ["arnavk23"]
__all__ = ["DistrPredictiveCalibration"]

from sklearn.base import BaseEstimator

from skpro.regression.base import BaseProbaRegressor


class _IdentityCalibrator(BaseEstimator):
"""Identity calibrator used in estimator checks for test instance creation.

Kept at module scope so sklearn cloning and serialization remain robust in tests.
"""

def fit(self, y_true, y_pred):
return self

def transform(self, y_pred):
return y_pred


class _ScaleOnlyCalibrator(BaseEstimator):
"""Simple deterministic calibrator for estimator check parametrization."""

def __init__(self, scale=1.1):
self.scale = scale

def fit(self, y_true, y_pred):
return self

def transform(self, y_pred):
return y_pred


class DistrPredictiveCalibration(BaseProbaRegressor):
"""DistrPredictiveCalibration pipeline for predictive target calibration.

Wraps a probabilistic regressor and applies a calibration method
to its predicted distributions.

Parameters
----------
regressor : BaseProbaRegressor
The probabilistic regressor to wrap.
calibrator : object
The calibration method to apply to predicted distributions.
Must implement fit(y_true, y_pred) and transform(y_pred).

Examples
--------
>>> from skpro.regression.compose import DistrPredictiveCalibration
>>> from skpro.regression.residual import ResidualDouble
>>> from sklearn.datasets import load_diabetes
>>> from sklearn.model_selection import train_test_split
>>> import pandas as pd
>>> # Dummy calibrator for demonstration
>>> from sklearn.base import BaseEstimator, TransformerMixin
>>> class DummyCalibrator(BaseEstimator, TransformerMixin):
... def fit(self, y_true, y_pred):
... return self
... def transform(self, y_pred):
... return y_pred
>>> # Load data
>>> X, y = load_diabetes(return_X_y=True, as_frame=True)
>>> y = pd.DataFrame(y)
>>> X_train, X_test, y_train, y_test = train_test_split(X, y)
>>> reg = ResidualDouble.create_test_instance()
>>> cal = DummyCalibrator()
>>> calreg = DistrPredictiveCalibration(regressor=reg, calibrator=cal)
>>> calreg.fit(X_train, y_train)
DistrPredictiveCalibration(...)
>>> y_pred = calreg.predict(X_test)
>>> y_pred_proba = calreg.predict_proba(X_test)
>>> # Note: Calibrator must accept and return distribution objects
>>> # as output from predict_proba.
"""

_tags = {
"capability:multioutput": True,
"capability:missing": True,
}

def __init__(self, regressor, calibrator):
self.regressor = regressor
self.calibrator = calibrator
super().__init__()

def _fit(self, X, y, C=None):
from sklearn.base import clone

# Clone regressor and calibrator to avoid mutating input parameters
self._fitted_regressor = clone(self.regressor)
self._fitted_regressor.fit(X, y, C=C)
self._fitted_calibrator = clone(self.calibrator)
# Fit calibrator on training predictions
y_pred = self._fitted_regressor.predict_proba(X)
self._fitted_calibrator.fit(y, y_pred)
return self

def _predict(self, X):
return self._fitted_regressor.predict(X)

def _predict_quantiles(self, X, alpha):
y_pred = self._fitted_regressor.predict_quantiles(X, alpha)
return self._fitted_calibrator.transform(y_pred)

def _predict_interval(self, X, coverage):
y_pred = self._fitted_regressor.predict_interval(X, coverage)
return self._fitted_calibrator.transform(y_pred)

def _predict_var(self, X):
y_pred = self._fitted_regressor.predict_var(X)
return self._fitted_calibrator.transform(y_pred)

def _predict_proba(self, X):
y_pred = self._fitted_regressor.predict_proba(X)
return self._fitted_calibrator.transform(y_pred)

@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter sets for automated tests.

Uses explicit calibrators to exercise constructor and set/get param checks.
"""
from skpro.regression.residual import ResidualDouble

reg = ResidualDouble.create_test_instance()
return [
{"regressor": reg, "calibrator": _IdentityCalibrator()},
{"regressor": reg, "calibrator": _ScaleOnlyCalibrator(scale=1.05)},
]
98 changes: 98 additions & 0 deletions skpro/regression/compose/target_transform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
"""Implements target transformation pipeline element for probabilistic regression."""
# copyright: skpro developers, BSD-3-Clause License (see LICENSE file)

__author__ = ["arnavk23"]
__all__ = ["TargetTransform"]

from skpro.regression.base import BaseProbaRegressor
from skpro.regression.compose._ttr import TransformedTargetRegressor


class TargetTransform(BaseProbaRegressor):
"""TargetTransform pipeline for target variable transformation.

Wraps a regressor and a transformer, applying the transformer to y
during fit and inverse-transforming predictions.
Uses TransformedTargetRegressor internally.

Parameters
----------
regressor : BaseProbaRegressor
The probabilistic regressor to wrap.
transformer : sklearn-like transformer
The transformer to apply to the target variable.

Examples
--------
>>> from skpro.regression.compose import TargetTransform
>>> from skpro.regression.residual import ResidualDouble
>>> from sklearn.preprocessing import StandardScaler, MinMaxScaler
>>> from sklearn.datasets import load_diabetes
>>> from sklearn.model_selection import train_test_split
>>> import pandas as pd
>>> # Load data
>>> X, y = load_diabetes(return_X_y=True, as_frame=True)
>>> y = pd.DataFrame(y)
>>> X_train, X_test, y_train, y_test = train_test_split(X, y)
>>> # Create a probabilistic regressor
>>> reg = ResidualDouble.create_test_instance()
>>> # Use StandardScaler for target transformation
>>> ttr = TargetTransform(regressor=reg, transformer=StandardScaler())
>>> ttr.fit(X_train, y_train)
TargetTransform(...)
>>> y_pred = ttr.predict(X_test)
>>> y_pred_proba = ttr.predict_proba(X_test)
>>> # Use MinMaxScaler for target transformation
>>> ttr2 = TargetTransform(regressor=reg, transformer=MinMaxScaler())
>>> ttr2.fit(X_train, y_train)
TargetTransform(...)
>>> y_pred2 = ttr2.predict(X_test)
"""

_tags = {
"capability:multioutput": True,
"capability:missing": True,
}

def __init__(self, regressor, transformer):
self.regressor = regressor
self.transformer = transformer
self._ttr = TransformedTargetRegressor(
regressor=regressor, transformer=transformer
)
super().__init__()

def _fit(self, X, y, C=None):
self._ttr.fit(X, y, C=C)
return self

def _predict(self, X):
return self._ttr.predict(X)

def _predict_quantiles(self, X, alpha):
return self._ttr.predict_quantiles(X, alpha)

def _predict_interval(self, X, coverage):
return self._ttr.predict_interval(X, coverage)

def _predict_var(self, X):
return self._ttr.predict_var(X)

def _predict_proba(self, X):
return self._ttr.predict_proba(X)

@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter sets for automated tests.

Returns two parameter sets: one with StandardScaler, one with MinMaxScaler.
"""
from sklearn.preprocessing import MinMaxScaler, StandardScaler

from skpro.regression.residual import ResidualDouble

reg = ResidualDouble.create_test_instance()
return [
{"regressor": reg, "transformer": StandardScaler()},
{"regressor": reg, "transformer": MinMaxScaler()},
]
Loading
Loading