huggingface · ArthurZucker · Feb 5, 2026
diff --git a/bindings/python/py_src/tokenizers/__init__.pyi b/bindings/python/py_src/tokenizers/__init__.pyi
@@ -1614,6 +1614,60 @@ class Tokenizer:
             :class:`~tokenizers.Encoding`: The final post-processed encoding
         """
         pass
+    def post_process_tokens(
+        self,
+        /,
+        tokens: list[str],
+        pair: list[str] | None = None,
+        add_special_tokens: bool = True,
+    ) -> list[str]:
+        """
+        Post-process a list of tokens (and optionally a pair) and return the processed tokens.
+
+        This is a simplified interface that only handles the token strings, without the full
+        Encoding information. Useful for step-by-step tokenization.
+
+        Args:
+            tokens (:obj:`List[str]`):
+                The main sequence of tokens
+
+            pair (:obj:`List[str]`, `optional`):
+                An optional pair sequence of tokens
+
+            add_special_tokens (:obj:`bool`, defaults to :obj:`True`):
+                Whether to add special tokens
+
+        Returns:
+            :obj:`List[str]`: A list of tokens with special tokens added according to the post-processor
+        """
+        ...
+    def post_process_ids(
+        self,
+        /,
+        ids: list[int],
+        pair: list[int] | None = None,
+        add_special_tokens: bool = True,
+    ) -> list[int]:
+        """
+        Post-process a list of token IDs (and optionally a pair) and return the processed IDs.
+
+        This is a simplified interface that only handles the token IDs, without the full
+        Encoding information. Useful for step-by-step tokenization.
+
+        Args:
+            ids (:obj:`List[int]`):
+                The main sequence of token IDs
+
+            pair (:obj:`List[int]`, `optional`):
+                An optional pair sequence of token IDs
+
+            add_special_tokens (:obj:`bool`, defaults to :obj:`True`):
+                Whether to add special tokens
+
+        Returns:
+            :obj:`List[int]`: A list of token IDs with special tokens added according to the post-processor
+        """
+        ...
 
     @property
     def post_processor(self):

diff --git a/bindings/python/py_src/tokenizers/processors/__init__.pyi b/bindings/python/py_src/tokenizers/processors/__init__.pyi
@@ -117,6 +117,60 @@ class BertProcessing(PostProcessor):
             :class:`~tokenizers.Encoding`: The final encoding
         """
         pass
+    def process_tokens(
+        self,
+        /,
+        tokens: list[str],
+        pair: list[str] | None = None,
+        add_special_tokens: bool = True,
+    ) -> list[str]:
+        """
+        Process a list of tokens (and optionally a pair) and return the processed tokens.
+
+        This is a simplified interface that only handles the token strings, without the full
+        Encoding information. Useful for step-by-step tokenization.
+
+        Args:
+            tokens (:obj:`List[str]`):
+                The main sequence of tokens
+
+            pair (:obj:`List[str]`, `optional`):
+                An optional pair sequence of tokens
+
+            add_special_tokens (:obj:`bool`, defaults to :obj:`True`):
+                Whether to add special tokens
+
+        Returns:
+            :obj:`List[str]`: A list of tokens with special tokens added
+        """
+        ...
+    def process_ids(
+        self,
+        /,
+        ids: list[int],
+        pair: list[int] | None = None,
+        add_special_tokens: bool = True,
+    ) -> list[int]:
+        """
+        Process a list of token IDs (and optionally a pair) and return the processed IDs.
+
+        This is a simplified interface that only handles the token IDs, without the full
+        Encoding information. Useful for step-by-step tokenization.
+
+        Args:
+            ids (:obj:`List[int]`):
+                The main sequence of token IDs
+
+            pair (:obj:`List[int]`, `optional`):
+                An optional pair sequence of token IDs
+
+            add_special_tokens (:obj:`bool`, defaults to :obj:`True`):
+                Whether to add special tokens
+
+        Returns:
+            :obj:`List[int]`: A list of token IDs with special tokens added
+        """
+        ...
 
     @property
     def sep(self):

diff --git a/bindings/python/src/processors.rs b/bindings/python/src/processors.rs
@@ -94,6 +94,26 @@ impl PostProcessor for PyPostProcessor {
         self.processor
             .process_encodings(encodings, add_special_tokens)
     }
+
+    fn process_tokens(
+        &self,
+        tokens: Vec<String>,
+        pair_tokens: Option<Vec<String>>,
+        add_special_tokens: bool,
+    ) -> tk::Result<Vec<String>> {
+        self.processor
+            .process_tokens(tokens, pair_tokens, add_special_tokens)
+    }
+
+    fn process_ids(
+        &self,
+        ids: Vec<u32>,
+        pair_ids: Option<Vec<u32>>,
+        add_special_tokens: bool,
+    ) -> tk::Result<Vec<u32>> {
+        self.processor
+            .process_ids(ids, pair_ids, add_special_tokens)
+    }
 }
 
 #[pymethods]
@@ -165,6 +185,66 @@ impl PyPostProcessor {
         Ok(final_encoding.into())
     }
 
+    /// Process a list of tokens (and optionally a pair) and return the processed tokens.
+    ///
+    /// This is a simplified interface that only handles the token strings, without the full
+    /// Encoding information. Useful for step-by-step tokenization.
+    ///
+    /// Args:
+    ///     tokens (:obj:`List[str]`):
+    ///         The main sequence of tokens
+    ///
+    ///     pair (:obj:`List[str]`, `optional`):
+    ///         An optional pair sequence of tokens
+    ///
+    ///     add_special_tokens (:obj:`bool`, defaults to :obj:`True`):
+    ///         Whether to add special tokens
+    ///
+    /// Returns:
+    ///     :obj:`List[str]`: A list of tokens with special tokens added
+    #[pyo3(signature = (tokens, pair = None, add_special_tokens = true))]
+    #[pyo3(text_signature = "(self, tokens, pair=None, add_special_tokens=True)")]
+    fn process_tokens(
+        &self,
+        tokens: Vec<String>,
+        pair: Option<Vec<String>>,
+        add_special_tokens: bool,
+    ) -> PyResult<Vec<String>> {
+        ToPyResult(
+            self.processor
+                .process_tokens(tokens, pair, add_special_tokens),
+        )
+        .into()
+    }
+
+    /// Process a list of token IDs (and optionally a pair) and return the processed IDs.
+    ///
+    /// This is a simplified interface that only handles the token IDs, without the full
+    /// Encoding information. Useful for step-by-step tokenization.
+    ///
+    /// Args:
+    ///     ids (:obj:`List[int]`):
+    ///         The main sequence of token IDs
+    ///
+    ///     pair (:obj:`List[int]`, `optional`):
+    ///         An optional pair sequence of token IDs
+    ///
+    ///     add_special_tokens (:obj:`bool`, defaults to :obj:`True`):
+    ///         Whether to add special tokens
+    ///
+    /// Returns:
+    ///     :obj:`List[int]`: A list of token IDs with special tokens added
+    #[pyo3(signature = (ids, pair = None, add_special_tokens = true))]
+    #[pyo3(text_signature = "(self, ids, pair=None, add_special_tokens=True)")]
+    fn process_ids(
+        &self,
+        ids: Vec<u32>,
+        pair: Option<Vec<u32>>,
+        add_special_tokens: bool,
+    ) -> PyResult<Vec<u32>> {
+        ToPyResult(self.processor.process_ids(ids, pair, add_special_tokens)).into()
+    }
+
     fn __repr__(&self) -> PyResult<String> {
         crate::utils::serde_pyo3::repr(self)
             .map_err(|e| exceptions::PyException::new_err(e.to_string()))
@@ -258,6 +338,56 @@ impl PostProcessor for PyPostProcessorTypeWrapper {
             },
         }
     }
+
+    fn process_tokens(
+        &self,
+        mut tokens: Vec<String>,
+        mut pair_tokens: Option<Vec<String>>,
+        add_special_tokens: bool,
+    ) -> tk::Result<Vec<String>> {
+        match self {
+            PyPostProcessorTypeWrapper::Single(inner) => inner
+                .read()
+                .map_err(|_| PyException::new_err("RwLock synchronisation primitive is poisoned, cannot get subtype of PyPreTokenizer"))?
+                .process_tokens(tokens, pair_tokens, add_special_tokens),
+            PyPostProcessorTypeWrapper::Sequence(inner) => {
+                for processor in inner.iter() {
+                    let result = processor
+                        .read()
+                        .map_err(|_| PyException::new_err("RwLock synchronisation primitive is poisoned, cannot get subtype of PyPreTokenizer"))?
+                        .process_tokens(tokens, pair_tokens, add_special_tokens)?;
+                    tokens = result;
+                    pair_tokens = None;
+                }
+                Ok(tokens)
+            },
+        }
+    }
+
+    fn process_ids(
+        &self,
+        mut ids: Vec<u32>,
+        mut pair_ids: Option<Vec<u32>>,
+        add_special_tokens: bool,
+    ) -> tk::Result<Vec<u32>> {
+        match self {
+            PyPostProcessorTypeWrapper::Single(inner) => inner
+                .read()
+                .map_err(|_| PyException::new_err("RwLock synchronisation primitive is poisoned, cannot get subtype of PyPreTokenizer"))?
+                .process_ids(ids, pair_ids, add_special_tokens),
+            PyPostProcessorTypeWrapper::Sequence(inner) => {
+                for processor in inner.iter() {
+                    let result = processor
+                        .read()
+                        .map_err(|_| PyException::new_err("RwLock synchronisation primitive is poisoned, cannot get subtype of PyPreTokenizer"))?
+                        .process_ids(ids, pair_ids, add_special_tokens)?;
+                    ids = result;
+                    pair_ids = None;
+                }
+                Ok(ids)
+            },
+        }
+    }
 }
 
 impl<'de> Deserialize<'de> for PyPostProcessorTypeWrapper {

diff --git a/bindings/python/src/tokenizer.rs b/bindings/python/src/tokenizer.rs
@@ -1735,6 +1735,70 @@ impl PyTokenizer {
         .into()
     }
 
+    /// Post-process a list of tokens (and optionally a pair) and return the processed tokens.
+    ///
+    /// This is a simplified interface that only handles the token strings, without the full
+    /// Encoding information. Useful for step-by-step tokenization.
+    ///
+    /// Args:
+    ///     tokens (:obj:`List[str]`):
+    ///         The main sequence of tokens
+    ///
+    ///     pair (:obj:`List[str]`, `optional`):
+    ///         An optional pair sequence of tokens
+    ///
+    ///     add_special_tokens (:obj:`bool`, defaults to :obj:`True`):
+    ///         Whether to add special tokens
+    ///
+    /// Returns:
+    ///     :obj:`List[str]`: A list of tokens with special tokens added according to the post-processor
+    #[pyo3(signature = (tokens, pair=None, add_special_tokens=true))]
+    #[pyo3(text_signature = "(self, tokens, pair=None, add_special_tokens=True)")]
+    fn post_process_tokens(
+        &self,
+        tokens: Vec<String>,
+        pair: Option<Vec<String>>,
+        add_special_tokens: bool,
+    ) -> PyResult<Vec<String>> {
+        ToPyResult(
+            self.tokenizer
+                .post_process_tokens(tokens, pair, add_special_tokens),
+        )
+        .into()
+    }
+
+    /// Post-process a list of token IDs (and optionally a pair) and return the processed IDs.
+    ///
+    /// This is a simplified interface that only handles the token IDs, without the full
+    /// Encoding information. Useful for step-by-step tokenization.
+    ///
+    /// Args:
+    ///     ids (:obj:`List[int]`):
+    ///         The main sequence of token IDs
+    ///
+    ///     pair (:obj:`List[int]`, `optional`):
+    ///         An optional pair sequence of token IDs
+    ///
+    ///     add_special_tokens (:obj:`bool`, defaults to :obj:`True`):
+    ///         Whether to add special tokens
+    ///
+    /// Returns:
+    ///     :obj:`List[int]`: A list of token IDs with special tokens added according to the post-processor
+    #[pyo3(signature = (ids, pair=None, add_special_tokens=true))]
+    #[pyo3(text_signature = "(self, ids, pair=None, add_special_tokens=True)")]
+    fn post_process_ids(
+        &self,
+        ids: Vec<u32>,
+        pair: Option<Vec<u32>>,
+        add_special_tokens: bool,
+    ) -> PyResult<Vec<u32>> {
+        ToPyResult(
+            self.tokenizer
+                .post_process_ids(ids, pair, add_special_tokens),
+        )
+        .into()
+    }
+
     /// The :class:`~tokenizers.models.Model` in use by the Tokenizer
     #[getter]
     fn get_model(&self, py: Python<'_>) -> PyResult<Py<PyAny>> {