Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions skpro/regression/residual.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ class ResidualDouble(BaseProbaRegressor):
min_scale : float, default=1e-10
minimum scale parameter. If smaller scale parameter is predicted by
``estimator_resid``, will be clipped to this value
response_lb : float or None, default=None
optional lower bound for target support.
If provided, quantile and interval predictions are clipped below this value.

Attributes
----------
Expand Down Expand Up @@ -136,6 +139,7 @@ def __init__(
use_y_pred=False,
cv=None,
min_scale=1e-10,
response_lb=None,
):
self.estimator = estimator
self.estimator_resid = estimator_resid
Expand All @@ -146,6 +150,7 @@ def __init__(
self.use_y_pred = use_y_pred
self.cv = cv
self.min_scale = min_scale
self.response_lb = response_lb

super().__init__()

Expand Down Expand Up @@ -366,6 +371,15 @@ def _predict_proba(self, X):
y_pred = distr_type(**params)
return y_pred

def _predict_quantiles(self, X, alpha):
"""Compute/return quantile predictions, with optional lower-bound clipping."""
quantiles = super()._predict_quantiles(X=X, alpha=alpha)

if self.response_lb is not None:
quantiles = quantiles.clip(lower=self.response_lb)

return quantiles

@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Expand Down Expand Up @@ -405,6 +419,7 @@ def get_test_params(cls, parameter_set="default"):
"distr_type": "t",
"distr_params": {"df": 3},
"cv": KFold(n_splits=3),
"response_lb": 0,
}
params4 = {"estimator": RandomForestRegressor(), "cv": KFold(n_splits=3)}

Expand Down
52 changes: 52 additions & 0 deletions skpro/regression/tests/test_residual.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""Tests for residual probabilistic regressors."""

import numpy as np
import pandas as pd
from sklearn.dummy import DummyRegressor
from sklearn.linear_model import TweedieRegressor

from skpro.regression.residual import ResidualDouble


def _make_positive_data(n=200, p=5, seed=42):
"""Create synthetic positive-response data for Tweedie-style models."""
rng = np.random.RandomState(seed)
X = rng.rand(n, p)
beta = rng.uniform(-0.5, 0.8, size=p)
mu = np.exp(X @ beta)

X = pd.DataFrame(X, columns=[f"feat_{i}" for i in range(p)])
y = pd.Series(mu, name="y")
return X, y


def test_residual_double_response_lb_clips_quantiles_and_intervals():
"""Quantile/interval outputs should respect configured lower bound."""
X, y = _make_positive_data()

reg_mean = TweedieRegressor(power=1.5, link="log")
reg_resid = DummyRegressor(strategy="constant", constant=3.0)

reg_unclipped = ResidualDouble(reg_mean, reg_resid, min_scale=1e-6)
reg_unclipped.fit(X, y)
q_unclipped = reg_unclipped.predict_quantiles(X, alpha=[0.05, 0.5, 0.95])

reg_clipped = ResidualDouble(
reg_mean,
reg_resid,
min_scale=1e-6,
response_lb=0.0,
)
reg_clipped.fit(X, y)
q_clipped = reg_clipped.predict_quantiles(X, alpha=[0.05, 0.5, 0.95])
i_clipped = reg_clipped.predict_interval(X, coverage=[0.9])

# test is meaningful: unclipped lower quantiles should contain negatives
assert (q_unclipped < 0).to_numpy().any()

# clipping is active and exact for quantiles
expected = q_unclipped.clip(lower=0.0)
pd.testing.assert_frame_equal(q_clipped, expected)

# interval lower/upper bounds must be >= 0 when response_lb=0
assert (i_clipped >= 0).to_numpy().all()