diff --git a/feature_engine/encoding/one_hot.py b/feature_engine/encoding/one_hot.py
index e94432a3d..9ab36dc93 100644
--- a/feature_engine/encoding/one_hot.py
+++ b/feature_engine/encoding/one_hot.py
@@ -1,6 +1,7 @@
 # Authors: Soledad Galli <solegalli@protonmail.com>
 # License: BSD 3 clause
 
+import warnings
 from typing import List, Optional, Union
 
 import numpy as np
@@ -94,6 +95,19 @@ class OneHotEncoder(CategoricalMethodsMixin, CategoricalInitMixin):
         to `True`, will ensure that for every binary variable in the dataset, only 1
         dummy is created.
 
+    drop: str, default=None
+        Controls which category to drop when creating k-1 dummy variables. Only used
+        if `top_categories` is None. If `drop` is not None and `drop_last` is also
+        True, a `FutureWarning` is raised and `drop` takes precedence.
+
+        - ``None``: No category is dropped (k dummies). Equivalent to
+          ``drop_last=False``.
+        - ``'last'``: Drops the last category in alphabetical order.
+        - ``'first'``: Drops the first category in alphabetical order.
+        - ``'most_frequent'``: Drops the most frequent category found during ``fit()``.
+          If there is a tie, a ``UserWarning`` is raised and the first
+          category alphabetically among the tied categories is dropped.
+
     {variables}
 
     {ignore_format}
@@ -162,6 +176,7 @@ def __init__(
         top_categories: Optional[int] = None,
         drop_last: bool = False,
         drop_last_binary: bool = False,
+        drop: Optional[str] = None,
         variables: Union[None, int, str, List[Union[str, int]]] = None,
         ignore_format: bool = False,
     ) -> None:
@@ -185,10 +200,26 @@ def __init__(
                 f"Got {drop_last_binary} instead."
             )
 
+        if drop is not None and drop not in ("last", "first", "most_frequent"):
+            raise ValueError(
+                "drop takes only values None, 'last', 'first', or "
+                f"'most_frequent'. Got {drop} instead."
+            )
+
+        if drop is not None and drop_last is True:
+            warnings.warn(
+                "Both `drop_last` and `drop` were set. `drop_last` is deprecated "
+                "when used together with `drop`. `drop` will take precedence. "
+                "In future versions, `drop_last` will be removed.",
+                FutureWarning,
+                stacklevel=2,
+            )
+
         super().__init__(variables, ignore_format)
         self.top_categories = top_categories
         self.drop_last = drop_last
         self.drop_last_binary = drop_last_binary
+        self.drop = drop
 
     def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
         """
@@ -230,8 +261,41 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
             else:
                 category_ls = list(X[var].unique())
 
-                # return k-1 dummies
-                if self.drop_last:
+                if self.drop is not None:
+                    sorted_cats = sorted(category_ls)
+
+                    if self.drop == "last":
+                        self.encoder_dict_[var] = sorted_cats[:-1]
+
+                    elif self.drop == "first":
+                        self.encoder_dict_[var] = sorted_cats[1:]
+
+                    elif self.drop == "most_frequent":
+                        freq = X[var].value_counts()
+                        max_freq = freq.iloc[0]
+                        most_frequent_cats = freq[
+                            freq == max_freq
+                        ].index.tolist()
+
+                        if len(most_frequent_cats) > 1:
+                            cat_to_drop = sorted(most_frequent_cats)[0]
+                            warnings.warn(
+                                f"Variable '{var}': multiple categories "
+                                f"share the highest frequency ({max_freq}). "
+                                f"Dropping '{cat_to_drop}' (first "
+                                f"alphabetically among ties).",
+                                UserWarning,
+                                stacklevel=2,
+                            )
+                        else:
+                            cat_to_drop = most_frequent_cats[0]
+
+                        self.encoder_dict_[var] = [
+                            c for c in category_ls if c != cat_to_drop
+                        ]
+
+                # Legacy path: drop_last (backward compatible)
+                elif self.drop_last:
                     self.encoder_dict_[var] = category_ls[:-1]
 
                 # return k dummies
diff --git a/feature_engine/selection/probe_feature_selection.py b/feature_engine/selection/probe_feature_selection.py
index ec112b3e4..520d3ecd8 100644
--- a/feature_engine/selection/probe_feature_selection.py
+++ b/feature_engine/selection/probe_feature_selection.py
@@ -87,6 +87,13 @@ class ProbeFeatureSelection(BaseSelector):
 
     {variables}
 
+    variables_discrete: list, default=None
+        A list of discrete variables. If None, all variables are treated equally and
+        their feature importance is compared to the feature importance of all probe
+        features. If passed, the discrete variables will be compared to the discrete
+        probe features, and the continuous variables will be compared to the
+        continuous probe features.
+
     collective: bool, default=True
          Whether the feature importance should be derived from an estimator trained on
          the entire dataset (True), or trained using individual features (False).
@@ -140,6 +147,9 @@ class ProbeFeatureSelection(BaseSelector):
 
     {variables_}
 
+    variables_discrete_:
+        A list of discrete variables to be compared with discrete probes.
+
     {feature_names_in_}
 
     {n_features_in_}
@@ -183,6 +193,7 @@ def __init__(
         self,
         estimator,
         variables: Variables = None,
+        variables_discrete: Variables = None,
         collective: bool = True,
         scoring: str = "roc_auc",
         n_probes: int = 1,
@@ -244,6 +255,7 @@ def __init__(
         super().__init__(confirm_variables)
         self.estimator = estimator
         self.variables = variables
+        self.variables_discrete = variables_discrete
         self.collective = collective
         self.scoring = scoring
         self.distribution = distribution
@@ -272,6 +284,19 @@ def fit(self, X: pd.DataFrame, y: pd.Series):
             X, self.variables, self.confirm_variables
         )
 
+        if self.variables_discrete is not None:
+            self.variables_discrete_ = _select_numerical_variables(
+                X, self.variables_discrete, self.confirm_variables
+            )
+            for var in self.variables_discrete_:
+                if var not in self.variables_:
+                    raise ValueError(
+                        f"Variable {var} is present in variables_discrete "
+                        f"but not in variables."
+                    )
+        else:
+            self.variables_discrete_ = None
+
         # save input features
         self._get_feature_names_in(X)
 
@@ -360,39 +385,58 @@ def _get_features_to_drop(self):
         Identify the variables that have a lower feature importance than the average
         feature importance of all the probe features.
         """
+        features_to_drop = []
+
+        if self.variables_discrete_ is None:
+            threshold = self._get_threshold(self.probe_features_.columns)
+            for var in self.variables_:
+                if self.feature_importances_[var] < threshold:
+                    features_to_drop.append(var)
+        else:
+            discrete_probes = [
+                c
+                for c in self.probe_features_.columns
+                if "binary" in c or "discrete_uniform" in c or "poisson" in c
+            ]
+            continuous_probes = [
+                c
+                for c in self.probe_features_.columns
+                if "gaussian" in c or ("uniform" in c and "discrete_uniform" not in c)
+            ]
+
+            threshold_discrete = self._get_threshold(discrete_probes)
+            threshold_continuous = self._get_threshold(continuous_probes)
+
+            for var in self.variables_:
+                if var in self.variables_discrete_:
+                    if self.feature_importances_[var] < threshold_discrete:
+                        features_to_drop.append(var)
+                else:
+                    if self.feature_importances_[var] < threshold_continuous:
+                        features_to_drop.append(var)
+
+        return features_to_drop
 
-        # if more than 1 probe feature, calculate threshold based on
-        # probe feature importance.
-        if self.probe_features_.shape[1] > 1:
+    def _get_threshold(self, probes):
+        if not len(probes):
+            raise ValueError(
+                "The selected distribution does not generate the required probes. "
+                "For example, if you set variables_discrete, you need to generate "
+                "both continuous and discrete probes."
+            )
+        if len(probes) > 1:
             if self.threshold == "mean":
-                threshold = self.feature_importances_[
-                    self.probe_features_.columns
-                ].values.mean()
+                threshold = self.feature_importances_[probes].values.mean()
             elif self.threshold == "max":
-                threshold = self.feature_importances_[
-                    self.probe_features_.columns
-                ].values.max()
+                threshold = self.feature_importances_[probes].values.max()
             else:
                 threshold = (
-                    self.feature_importances_[
-                        self.probe_features_.columns
-                    ].values.mean()
-                    + 3
-                    * self.feature_importances_[
-                        self.probe_features_.columns
-                    ].values.std()
+                    self.feature_importances_[probes].values.mean()
+                    + 3 * self.feature_importances_[probes].values.std()
                 )
-
         else:
-            threshold = self.feature_importances_[self.probe_features_.columns].values
-
-        features_to_drop = []
-
-        for var in self.variables_:
-            if self.feature_importances_[var] < threshold:
-                features_to_drop.append(var)
-
-        return features_to_drop
+            threshold = self.feature_importances_[probes].values[0]
+        return threshold
 
     def _more_tags(self):
         tags_dict = _return_tags()
diff --git a/tests/test_encoding/test_onehot_encoder.py b/tests/test_encoding/test_onehot_encoder.py
index aca3448be..ac2c8ad0b 100644
--- a/tests/test_encoding/test_onehot_encoder.py
+++ b/tests/test_encoding/test_onehot_encoder.py
@@ -534,3 +534,206 @@ def test_inverse_transform_raises_not_implemented_error(df_enc_binary):
     enc = OneHotEncoder().fit(df_enc_binary)
     with pytest.raises(NotImplementedError):
         enc.inverse_transform(df_enc_binary)
+
+
+# ===========================================================================
+# Tests for the new `drop` parameter (Issue #913)
+# ===========================================================================
+
+
+@pytest.fixture(scope="module")
+def df_drop():
+    """DataFrame with known categories for testing drop strategies."""
+    df = pd.DataFrame(
+        {
+            "x1": ["c", "a", "b", "a", "c", "b", "a"],
+            "x2": ["z", "y", "z", "x", "y", "z", "x"],
+            "num": [1, 2, 3, 4, 5, 6, 7],
+        }
+    )
+    return df
+
+
+def test_drop_last_alphabetically(df_drop):
+    """drop='last' should drop the last category in sorted order."""
+    encoder = OneHotEncoder(drop="last")
+    encoder.fit(df_drop)
+
+    # x1 categories sorted: ['a', 'b', 'c'] -> drop 'c'
+    assert encoder.encoder_dict_["x1"] == ["a", "b"]
+    # x2 categories sorted: ['x', 'y', 'z'] -> drop 'z'
+    assert encoder.encoder_dict_["x2"] == ["x", "y"]
+
+    X = encoder.transform(df_drop)
+    assert "x1_c" not in X.columns
+    assert "x2_z" not in X.columns
+    assert "x1_a" in X.columns
+    assert "x1_b" in X.columns
+    assert "x2_x" in X.columns
+    assert "x2_y" in X.columns
+
+
+def test_drop_first_alphabetically(df_drop):
+    """drop='first' should drop the first category in sorted order."""
+    encoder = OneHotEncoder(drop="first")
+    encoder.fit(df_drop)
+
+    # x1 categories sorted: ['a', 'b', 'c'] -> drop 'a'
+    assert encoder.encoder_dict_["x1"] == ["b", "c"]
+    # x2 categories sorted: ['x', 'y', 'z'] -> drop 'x'
+    assert encoder.encoder_dict_["x2"] == ["y", "z"]
+
+    X = encoder.transform(df_drop)
+    assert "x1_a" not in X.columns
+    assert "x2_x" not in X.columns
+    assert "x1_b" in X.columns
+    assert "x1_c" in X.columns
+    assert "x2_y" in X.columns
+    assert "x2_z" in X.columns
+
+
+def test_drop_most_frequent():
+    """drop='most_frequent' should drop the most common category."""
+    df = pd.DataFrame(
+        {
+            "x1": ["a"] * 10 + ["b"] * 5 + ["c"] * 3,
+        }
+    )
+
+    encoder = OneHotEncoder(drop="most_frequent")
+    encoder.fit(df)
+
+    # 'a' is most frequent (10 times) -> drop 'a'
+    assert "a" not in encoder.encoder_dict_["x1"]
+    assert "b" in encoder.encoder_dict_["x1"]
+    assert "c" in encoder.encoder_dict_["x1"]
+
+    X = encoder.transform(df)
+    assert "x1_a" not in X.columns
+    assert "x1_b" in X.columns
+    assert "x1_c" in X.columns
+
+
+def test_drop_most_frequent_with_tie():
+    """When multiple categories tie for most frequent, warn and drop first alpha."""
+    df = pd.DataFrame(
+        {
+            "x1": ["c"] * 5 + ["a"] * 5 + ["b"] * 3,
+        }
+    )
+
+    with pytest.warns(UserWarning, match="multiple categories share the highest"):
+        encoder = OneHotEncoder(drop="most_frequent")
+        encoder.fit(df)
+
+    # 'a' and 'c' both have frequency 5 — drop 'a' (first alphabetically)
+    assert "a" not in encoder.encoder_dict_["x1"]
+    assert "b" in encoder.encoder_dict_["x1"]
+    assert "c" in encoder.encoder_dict_["x1"]
+
+
+def test_drop_ignored_when_top_categories_set():
+    """top_categories should take precedence over drop."""
+    df = pd.DataFrame(
+        {
+            "x1": ["a"] * 10 + ["b"] * 5 + ["c"] * 3 + ["d"] * 1,
+        }
+    )
+
+    encoder = OneHotEncoder(top_categories=2, drop="first")
+    encoder.fit(df)
+
+    # top_categories=2 should pick the 2 most frequent: ['a', 'b']
+    assert encoder.encoder_dict_["x1"] == ["a", "b"]
+
+
+def test_drop_overrides_drop_last():
+    """When both drop and drop_last are set, drop wins and FutureWarning is raised."""
+    df = pd.DataFrame(
+        {
+            "x1": ["c", "a", "b", "a", "c", "b", "a"],
+        }
+    )
+
+    with pytest.warns(FutureWarning, match="drop_last.*deprecated"):
+        encoder = OneHotEncoder(drop_last=True, drop="first")
+
+    encoder.fit(df)
+
+    # drop="first" should drop 'a' (sorted: ['a', 'b', 'c'])
+    assert encoder.encoder_dict_["x1"] == ["b", "c"]
+
+
+def test_drop_with_drop_last_binary():
+    """drop and drop_last_binary should work together correctly."""
+    df = pd.DataFrame(
+        {
+            "x1": ["a"] * 10 + ["b"] * 5 + ["c"] * 3,
+            "x2": ["yes"] * 10 + ["no"] * 8,  # binary variable
+        }
+    )
+
+    encoder = OneHotEncoder(drop="first", drop_last_binary=True)
+    encoder.fit(df)
+
+    # x1: sorted ['a', 'b', 'c'] -> drop 'a'
+    assert encoder.encoder_dict_["x1"] == ["b", "c"]
+
+    # x2: binary -> drop_last_binary overrides to keep only the first unique
+    assert len(encoder.encoder_dict_["x2"]) == 1
+
+
+@pytest.mark.parametrize(
+    "drop_value", ["empanada", "middle", 123, True, ["last"]]
+)
+def test_error_if_drop_not_valid_string(drop_value):
+    """Invalid drop values should raise ValueError."""
+    with pytest.raises(ValueError, match="drop takes only values"):
+        OneHotEncoder(drop=drop_value)
+
+
+def test_get_feature_names_out_with_drop(df_enc_binary):
+    """get_feature_names_out should reflect the dropped category."""
+    original_features = ["var_num"]
+    input_features = df_enc_binary.columns
+
+    # drop="first": sorted cats for var_A are ['A','B','C'] -> drop 'A'
+    tr = OneHotEncoder(drop="first")
+    tr.fit(df_enc_binary)
+
+    out = [
+        "var_A_B",
+        "var_A_C",
+        "var_B_B",
+        "var_B_C",
+        "var_C_UHU",
+        "var_D_OHO",
+    ]
+    feat_out = original_features + out
+    assert tr.get_feature_names_out(input_features=None) == feat_out
+    assert tr.get_feature_names_out(input_features=input_features) == feat_out
+
+
+def test_drop_none_produces_k_dummies(df_drop):
+    """drop=None (default) should produce k dummies, same as drop_last=False."""
+    encoder = OneHotEncoder(drop=None, drop_last=False)
+    encoder.fit(df_drop)
+
+    # x1 has 3 unique categories -> 3 dummies
+    assert len(encoder.encoder_dict_["x1"]) == 3
+    # x2 has 3 unique categories -> 3 dummies
+    assert len(encoder.encoder_dict_["x2"]) == 3
+
+
+def test_drop_last_backward_compatible(df_drop):
+    """Existing drop_last=True without drop should behave exactly as before."""
+    encoder = OneHotEncoder(drop_last=True)
+    encoder.fit(df_drop)
+
+    # Original behavior: category_ls = list(unique()), drop last element
+    # This preserves insertion order, NOT sorted order
+    x1_unique = list(df_drop["x1"].unique())
+    assert encoder.encoder_dict_["x1"] == x1_unique[:-1]
+
+    x2_unique = list(df_drop["x2"].unique())
+    assert encoder.encoder_dict_["x2"] == x2_unique[:-1]
diff --git a/tests/test_selection/test_probe_feature_selection.py b/tests/test_selection/test_probe_feature_selection.py
index 58d489122..ab2b9b32e 100644
--- a/tests/test_selection/test_probe_feature_selection.py
+++ b/tests/test_selection/test_probe_feature_selection.py
@@ -8,22 +8,30 @@
 from feature_engine.selection import ProbeFeatureSelection
 
 _input_params = [
-    (RandomForestClassifier(), "precision", "all", 3, 3, 6, 4),
-    (Lasso(), "neg_mean_squared_error", "binary", 7, 7, 4, 100),
-    (LogisticRegression(), "roc_auc", "normal", 5, 5, 2, 73),
-    (DecisionTreeRegressor(), "r2", "uniform", 4, 4, 10, 84),
-    (DecisionTreeRegressor(), "r2", "discrete_uniform", 4, 4, 10, 84),
-    (DecisionTreeRegressor(), "r2", "poisson", 4, 4, 10, 84),
-    (RandomForestClassifier(), "precision", ["binary", "uniform"], 3, 3, 6, 4),
+    (RandomForestClassifier(), "precision", "all", 3, 3, 6, 4, None),
+    (Lasso(), "neg_mean_squared_error", "binary", 7, 7, 4, 100, ["var1"]),
+    (LogisticRegression(), "roc_auc", "normal", 5, 5, 2, 73, None),
+    (DecisionTreeRegressor(), "r2", "uniform", 4, 4, 10, 84, ["var2"]),
+    (DecisionTreeRegressor(), "r2", "discrete_uniform", 4, 4, 10, 84, None),
+    (DecisionTreeRegressor(), "r2", "poisson", 4, 4, 10, 84, ["var1", "var2"]),
+    (RandomForestClassifier(), "precision", ["binary", "uniform"], 3, 3, 6, 4, None),
 ]
 
 
 @pytest.mark.parametrize(
-    "_estimator, _scoring, _distribution, _n_cat, _cv, _n_probes, _random_state",
+    "_estimator, _scoring, _distribution, _n_cat, _cv, _n_probes, "
+    "_random_state, _variables_discrete",
     _input_params,
 )
 def test_input_params_assignment(
-    _estimator, _scoring, _distribution, _n_cat, _cv, _n_probes, _random_state
+    _estimator,
+    _scoring,
+    _distribution,
+    _n_cat,
+    _cv,
+    _n_probes,
+    _random_state,
+    _variables_discrete,
 ):
     sel = ProbeFeatureSelection(
         estimator=_estimator,
@@ -33,6 +41,7 @@ def test_input_params_assignment(
         cv=_cv,
         n_probes=_n_probes,
         random_state=_random_state,
+        variables_discrete=_variables_discrete,
     )
 
     assert sel.estimator == _estimator
@@ -42,6 +51,7 @@ def test_input_params_assignment(
     assert sel.cv == _cv
     assert sel.n_probes == _n_probes
     assert sel.random_state == _random_state
+    assert sel.variables_discrete == _variables_discrete
 
 
 @pytest.mark.parametrize("collective", [True, False])
@@ -349,6 +359,7 @@ def test_get_features_to_drop_with_one_probe(thresh):
     )
     sel.probe_features_ = pd.DataFrame({"probe": [1, 1, 1, 1, 1]})
     sel.variables_ = ["var1", "var2", "var3"]
+    sel.variables_discrete_ = None
     assert sel._get_features_to_drop() == ["var3"]
 
 
@@ -374,9 +385,78 @@ def test_get_features_to_drop_with_many_probes(thresh, vars_to_drop):
         {"probe1": [1, 1, 1, 1, 1], "probe2": [1, 1, 1, 1, 1]}
     )
     sel.variables_ = ["var1", "var2", "var3", "var4"]
+    sel.variables_discrete_ = None
     assert sel._get_features_to_drop() == vars_to_drop
 
 
+def test_variables_discrete_raises_error_when_not_in_variables(df_test):
+    X, y = df_test
+
+    sel = ProbeFeatureSelection(
+        estimator=DecisionTreeClassifier(),
+        variables=["var_0", "var_1"],
+        variables_discrete=["var_2"],
+    )
+    msg = "Variable var_2 is present in variables_discrete but not in variables."
+    with pytest.raises(ValueError, match=msg):
+        sel.fit(X, y)
+
+
+def test_variables_discrete_raises_error_when_no_probes_generated(df_test):
+    X, y = df_test
+
+    sel = ProbeFeatureSelection(
+        estimator=DecisionTreeClassifier(),
+        variables=["var_0", "var_1"],
+        variables_discrete=["var_1"],
+        distribution="normal",  # only generates continuous probes
+    )
+    msg = "The selected distribution does not generate the required probes.*"
+    with pytest.raises(ValueError, match=msg):
+        sel.fit(X, y)
+
+    sel = ProbeFeatureSelection(
+        estimator=DecisionTreeClassifier(),
+        variables=["var_0", "var_1"],
+        variables_discrete=["var_1"],
+        distribution="binary",  # only generates discrete probes
+    )
+    with pytest.raises(ValueError, match=msg):
+        sel.fit(X, y)
+
+
+def test_variables_discrete_functionality():
+    sel = ProbeFeatureSelection(
+        estimator=LogisticRegression(),
+        n_probes=2,
+    )
+    sel.feature_importances_ = pd.Series(
+        [11, 20, 9.9, 8.7, 10, 8, 9, 7],
+        index=[
+            "var1",
+            "var2",
+            "var3",
+            "var4",
+            "gaussian_probe_0",
+            "gaussian_probe_1",
+            "binary_probe_0",
+            "binary_probe_1",
+        ],
+    )
+    sel.probe_features_ = pd.DataFrame(
+        {
+            "gaussian_probe_0": [1],
+            "gaussian_probe_1": [1],
+            "binary_probe_0": [1],
+            "binary_probe_1": [1],
+        }
+    )
+    sel.variables_ = ["var1", "var2", "var3", "var4"]
+    sel.variables_discrete_ = ["var3"]
+
+    assert sel._get_features_to_drop() == ["var4"]
+
+
 def test_cv_generator(df_test):
     X, y = df_test
     cv = StratifiedKFold(n_splits=3)