diff --git a/skpro/regression/residual.py b/skpro/regression/residual.py index 54e09b4a7..3a7c254d1 100644 --- a/skpro/regression/residual.py +++ b/skpro/regression/residual.py @@ -97,6 +97,9 @@ class ResidualDouble(BaseProbaRegressor): min_scale : float, default=1e-10 minimum scale parameter. If smaller scale parameter is predicted by ``estimator_resid``, will be clipped to this value + response_lb : float or None, default=None + optional lower bound for target support. + If provided, quantile and interval predictions are clipped below this value. Attributes ---------- @@ -136,6 +139,7 @@ def __init__( use_y_pred=False, cv=None, min_scale=1e-10, + response_lb=None, ): self.estimator = estimator self.estimator_resid = estimator_resid @@ -146,6 +150,7 @@ def __init__( self.use_y_pred = use_y_pred self.cv = cv self.min_scale = min_scale + self.response_lb = response_lb super().__init__() @@ -366,6 +371,15 @@ def _predict_proba(self, X): y_pred = distr_type(**params) return y_pred + def _predict_quantiles(self, X, alpha): + """Compute/return quantile predictions, with optional lower-bound clipping.""" + quantiles = super()._predict_quantiles(X=X, alpha=alpha) + + if self.response_lb is not None: + quantiles = quantiles.clip(lower=self.response_lb) + + return quantiles + @classmethod def get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. @@ -405,6 +419,7 @@ def get_test_params(cls, parameter_set="default"): "distr_type": "t", "distr_params": {"df": 3}, "cv": KFold(n_splits=3), + "response_lb": 0, } params4 = {"estimator": RandomForestRegressor(), "cv": KFold(n_splits=3)} diff --git a/skpro/regression/tests/test_residual.py b/skpro/regression/tests/test_residual.py new file mode 100644 index 000000000..48ae6a23a --- /dev/null +++ b/skpro/regression/tests/test_residual.py @@ -0,0 +1,52 @@ +"""Tests for residual probabilistic regressors.""" + +import numpy as np +import pandas as pd +from sklearn.dummy import DummyRegressor +from sklearn.linear_model import TweedieRegressor + +from skpro.regression.residual import ResidualDouble + + +def _make_positive_data(n=200, p=5, seed=42): + """Create synthetic positive-response data for Tweedie-style models.""" + rng = np.random.RandomState(seed) + X = rng.rand(n, p) + beta = rng.uniform(-0.5, 0.8, size=p) + mu = np.exp(X @ beta) + + X = pd.DataFrame(X, columns=[f"feat_{i}" for i in range(p)]) + y = pd.Series(mu, name="y") + return X, y + + +def test_residual_double_response_lb_clips_quantiles_and_intervals(): + """Quantile/interval outputs should respect configured lower bound.""" + X, y = _make_positive_data() + + reg_mean = TweedieRegressor(power=1.5, link="log") + reg_resid = DummyRegressor(strategy="constant", constant=3.0) + + reg_unclipped = ResidualDouble(reg_mean, reg_resid, min_scale=1e-6) + reg_unclipped.fit(X, y) + q_unclipped = reg_unclipped.predict_quantiles(X, alpha=[0.05, 0.5, 0.95]) + + reg_clipped = ResidualDouble( + reg_mean, + reg_resid, + min_scale=1e-6, + response_lb=0.0, + ) + reg_clipped.fit(X, y) + q_clipped = reg_clipped.predict_quantiles(X, alpha=[0.05, 0.5, 0.95]) + i_clipped = reg_clipped.predict_interval(X, coverage=[0.9]) + + # test is meaningful: unclipped lower quantiles should contain negatives + assert (q_unclipped < 0).to_numpy().any() + + # clipping is active and exact for quantiles + expected = q_unclipped.clip(lower=0.0) + pd.testing.assert_frame_equal(q_clipped, expected) + + # interval lower/upper bounds must be >= 0 when response_lb=0 + assert (i_clipped >= 0).to_numpy().all()