From f1ad3e7e82bdec7258497168ed89a11e47d4e80f Mon Sep 17 00:00:00 2001 From: TimpiaAI Date: Sun, 4 Jan 2026 01:35:51 +0200 Subject: [PATCH] feat: add scorer parameter to partial_ratio for Levenshtein support Add a scorer parameter to partial_ratio and downstream functions that allows choosing between indel (default) and levenshtein. - Default is indel for backward compatibility - C++ uses optimized path for indel, falls back to Python for levenshtein - Adds 11 new tests for the scorer parameter Closes rapidfuzz/RapidFuzz##401 --- src/rapidfuzz/fuzz.pyi | 15 +++++- src/rapidfuzz/fuzz_cpp.pyx | 36 +++++++++++--- src/rapidfuzz/fuzz_py.py | 71 +++++++++++++++++++++++----- tests/test_fuzz.py | 96 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 200 insertions(+), 18 deletions(-) diff --git a/src/rapidfuzz/fuzz.pyi b/src/rapidfuzz/fuzz.pyi index 0eb38bb8..9f9e4097 100644 --- a/src/rapidfuzz/fuzz.pyi +++ b/src/rapidfuzz/fuzz.pyi @@ -4,12 +4,13 @@ from __future__ import annotations from collections.abc import Hashable, Sequence -from typing import Callable, TypeVar, overload +from typing import Callable, Literal, TypeVar, overload from rapidfuzz.distance import ScoreAlignment _UnprocessedType1 = TypeVar("_UnprocessedType1") _UnprocessedType2 = TypeVar("_UnprocessedType2") +_ScorerType = Literal["indel", "levenshtein"] @overload def ratio( @@ -34,6 +35,7 @@ def partial_ratio( *, processor: None = None, score_cutoff: float | None = 0, + scorer: _ScorerType = "indel", ) -> float: ... @overload def partial_ratio( @@ -42,6 +44,7 @@ def partial_ratio( *, processor: Callable[[_UnprocessedType1 | _UnprocessedType2], Sequence[Hashable]], score_cutoff: float | None = 0, + scorer: _ScorerType = "indel", ) -> float: ... @overload def partial_ratio_alignment( @@ -50,6 +53,7 @@ def partial_ratio_alignment( *, processor: None = None, score_cutoff: float | None = 0, + scorer: _ScorerType = "indel", ) -> ScoreAlignment | None: ... @overload def partial_ratio_alignment( @@ -58,6 +62,7 @@ def partial_ratio_alignment( *, processor: Callable[[_UnprocessedType1 | _UnprocessedType2], Sequence[Hashable]], score_cutoff: float | None = 0, + scorer: _ScorerType = "indel", ) -> ScoreAlignment | None: ... @overload def token_sort_ratio( @@ -114,6 +119,7 @@ def partial_token_sort_ratio( *, processor: None = None, score_cutoff: float | None = 0, + scorer: _ScorerType = "indel", ) -> float: ... @overload def partial_token_sort_ratio( @@ -122,6 +128,7 @@ def partial_token_sort_ratio( *, processor: Callable[[_UnprocessedType1 | _UnprocessedType2], Sequence[Hashable]], score_cutoff: float | None = 0, + scorer: _ScorerType = "indel", ) -> float: ... @overload def partial_token_set_ratio( @@ -130,6 +137,7 @@ def partial_token_set_ratio( *, processor: None = None, score_cutoff: float | None = 0, + scorer: _ScorerType = "indel", ) -> float: ... @overload def partial_token_set_ratio( @@ -138,6 +146,7 @@ def partial_token_set_ratio( *, processor: Callable[[_UnprocessedType1 | _UnprocessedType2], Sequence[Hashable]], score_cutoff: float | None = 0, + scorer: _ScorerType = "indel", ) -> float: ... @overload def partial_token_ratio( @@ -146,6 +155,7 @@ def partial_token_ratio( *, processor: None = None, score_cutoff: float | None = 0, + scorer: _ScorerType = "indel", ) -> float: ... @overload def partial_token_ratio( @@ -154,6 +164,7 @@ def partial_token_ratio( *, processor: Callable[[_UnprocessedType1 | _UnprocessedType2], Sequence[Hashable]], score_cutoff: float | None = 0, + scorer: _ScorerType = "indel", ) -> float: ... @overload def WRatio( @@ -162,6 +173,7 @@ def WRatio( *, processor: None = None, score_cutoff: float | None = 0, + scorer: _ScorerType = "indel", ) -> float: ... @overload def WRatio( @@ -170,6 +182,7 @@ def WRatio( *, processor: Callable[[_UnprocessedType1 | _UnprocessedType2], Sequence[Hashable]], score_cutoff: float | None = 0, + scorer: _ScorerType = "indel", ) -> float: ... @overload def QRatio( diff --git a/src/rapidfuzz/fuzz_cpp.pyx b/src/rapidfuzz/fuzz_cpp.pyx index eddad3a7..bed635cc 100644 --- a/src/rapidfuzz/fuzz_cpp.pyx +++ b/src/rapidfuzz/fuzz_cpp.pyx @@ -94,7 +94,7 @@ def ratio(s1, s2, *, processor=None, score_cutoff=None): return ratio_func(s1_proc.string, s2_proc.string, c_score_cutoff) -def partial_ratio(s1, s2, *, processor=None, score_cutoff=None): +def partial_ratio(s1, s2, *, processor=None, score_cutoff=None, scorer="indel"): cdef double c_score_cutoff = 0.0 if score_cutoff is None else score_cutoff cdef RF_StringWrapper s1_proc, s2_proc @@ -102,11 +102,15 @@ def partial_ratio(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return 0 + # For non-indel scorers, fall back to Python implementation + if scorer is not None and scorer != "indel": + return fuzz_py.partial_ratio(s1, s2, processor=processor, score_cutoff=score_cutoff, scorer=scorer) + preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return partial_ratio_func(s1_proc.string, s2_proc.string, c_score_cutoff) -def partial_ratio_alignment(s1, s2, *, processor=None, score_cutoff=None): +def partial_ratio_alignment(s1, s2, *, processor=None, score_cutoff=None, scorer="indel"): cdef double c_score_cutoff = 0.0 if score_cutoff is None else score_cutoff cdef RF_StringWrapper s1_proc, s2_proc @@ -114,6 +118,10 @@ def partial_ratio_alignment(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return None + # For non-indel scorers, fall back to Python implementation + if scorer is not None and scorer != "indel": + return fuzz_py.partial_ratio_alignment(s1, s2, processor=processor, score_cutoff=score_cutoff, scorer=scorer) + preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) res = partial_ratio_alignment_func(s1_proc.string, s2_proc.string, c_score_cutoff) @@ -159,7 +167,7 @@ def token_ratio(s1, s2, *, processor=None, score_cutoff=None): return token_ratio_func(s1_proc.string, s2_proc.string, c_score_cutoff) -def partial_token_sort_ratio(s1, s2, *, processor=None, score_cutoff=None): +def partial_token_sort_ratio(s1, s2, *, processor=None, score_cutoff=None, scorer="indel"): cdef double c_score_cutoff = 0.0 if score_cutoff is None else score_cutoff cdef RF_StringWrapper s1_proc, s2_proc @@ -167,11 +175,15 @@ def partial_token_sort_ratio(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return 0 + # For non-indel scorers, fall back to Python implementation + if scorer is not None and scorer != "indel": + return fuzz_py.partial_token_sort_ratio(s1, s2, processor=processor, score_cutoff=score_cutoff, scorer=scorer) + preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return partial_token_sort_ratio_func(s1_proc.string, s2_proc.string, c_score_cutoff) -def partial_token_set_ratio(s1, s2, *, processor=None, score_cutoff=None): +def partial_token_set_ratio(s1, s2, *, processor=None, score_cutoff=None, scorer="indel"): cdef double c_score_cutoff = 0.0 if score_cutoff is None else score_cutoff cdef RF_StringWrapper s1_proc, s2_proc @@ -179,11 +191,15 @@ def partial_token_set_ratio(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return 0 + # For non-indel scorers, fall back to Python implementation + if scorer is not None and scorer != "indel": + return fuzz_py.partial_token_set_ratio(s1, s2, processor=processor, score_cutoff=score_cutoff, scorer=scorer) + preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return partial_token_set_ratio_func(s1_proc.string, s2_proc.string, c_score_cutoff) -def partial_token_ratio(s1, s2, *, processor=None, score_cutoff=None): +def partial_token_ratio(s1, s2, *, processor=None, score_cutoff=None, scorer="indel"): cdef double c_score_cutoff = 0.0 if score_cutoff is None else score_cutoff cdef RF_StringWrapper s1_proc, s2_proc @@ -191,11 +207,15 @@ def partial_token_ratio(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return 0 + # For non-indel scorers, fall back to Python implementation + if scorer is not None and scorer != "indel": + return fuzz_py.partial_token_ratio(s1, s2, processor=processor, score_cutoff=score_cutoff, scorer=scorer) + preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return partial_token_ratio_func(s1_proc.string, s2_proc.string, c_score_cutoff) -def WRatio(s1, s2, *, processor=None, score_cutoff=None): +def WRatio(s1, s2, *, processor=None, score_cutoff=None, scorer="indel"): cdef double c_score_cutoff = 0.0 if score_cutoff is None else score_cutoff cdef RF_StringWrapper s1_proc, s2_proc @@ -203,6 +223,10 @@ def WRatio(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return 0 + # For non-indel scorers, fall back to Python implementation + if scorer is not None and scorer != "indel": + return fuzz_py.WRatio(s1, s2, processor=processor, score_cutoff=score_cutoff, scorer=scorer) + preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return WRatio_func(s1_proc.string, s2_proc.string, c_score_cutoff) diff --git a/src/rapidfuzz/fuzz_py.py b/src/rapidfuzz/fuzz_py.py index 69cfee65..723890bb 100644 --- a/src/rapidfuzz/fuzz_py.py +++ b/src/rapidfuzz/fuzz_py.py @@ -12,6 +12,27 @@ distance as indel_distance, normalized_similarity as indel_normalized_similarity, ) +from rapidfuzz.distance.Levenshtein_py import ( + normalized_similarity as levenshtein_normalized_similarity, +) + + +def _levenshtein_block_normalized_similarity(block, s1, s2, score_cutoff=None): + """ + Fallback block normalized similarity for Levenshtein. + Note: This doesn't use the block optimization, it's a simple wrapper. + """ + return levenshtein_normalized_similarity(s1, s2, score_cutoff=score_cutoff) + + +def _get_scorer_func(scorer): + """Get the appropriate block normalized similarity function for the given scorer type.""" + if scorer is None or scorer == "indel": + return indel_block_normalized_similarity + elif scorer == "levenshtein": + return _levenshtein_block_normalized_similarity + else: + raise ValueError(f"Unknown scorer: {scorer!r}. Valid options are 'indel' or 'levenshtein'.") def get_scorer_flags_fuzz(**_kwargs): @@ -113,7 +134,7 @@ def ratio( return score * 100 -def _partial_ratio_impl(s1, s2, score_cutoff): +def _partial_ratio_impl(s1, s2, score_cutoff, scorer_func): """ implementation of partial_ratio. This assumes len(s1) <= len(s2). """ @@ -136,7 +157,7 @@ def _partial_ratio_impl(s1, s2, score_cutoff): continue # todo cache map - ls_ratio = indel_block_normalized_similarity(block, s1, s2[:i], score_cutoff=score_cutoff) + ls_ratio = scorer_func(block, s1, s2[:i], score_cutoff=score_cutoff) if ls_ratio > res.score: res.score = score_cutoff = ls_ratio res.dest_start = 0 @@ -151,7 +172,7 @@ def _partial_ratio_impl(s1, s2, score_cutoff): continue # todo cache map - ls_ratio = indel_block_normalized_similarity(block, s1, s2[i : i + len1], score_cutoff=score_cutoff) + ls_ratio = scorer_func(block, s1, s2[i : i + len1], score_cutoff=score_cutoff) if ls_ratio > res.score: res.score = score_cutoff = ls_ratio res.dest_start = i @@ -166,7 +187,7 @@ def _partial_ratio_impl(s1, s2, score_cutoff): continue # todo cache map - ls_ratio = indel_block_normalized_similarity(block, s1, s2[i:], score_cutoff=score_cutoff) + ls_ratio = scorer_func(block, s1, s2[i:], score_cutoff=score_cutoff) if ls_ratio > res.score: res.score = score_cutoff = ls_ratio res.dest_start = i @@ -185,6 +206,7 @@ def partial_ratio( *, processor=None, score_cutoff=None, + scorer="indel", ): """ Searches for the optimal alignment of the shorter string in the @@ -203,6 +225,10 @@ def partial_ratio( Optional argument for a score threshold as a float between 0 and 100. For ratio < score_cutoff 0 is returned instead. Default is 0, which deactivates this behaviour. + scorer : str, optional + The scoring algorithm to use. Options are "indel" (default) or "levenshtein". + - "indel": Uses Indel distance (substitution costs 2, i.e., delete + insert) + - "levenshtein": Uses Levenshtein distance (substitution costs 1) Returns ------- @@ -249,8 +275,10 @@ def partial_ratio( -------- >>> fuzz.partial_ratio("this is a test", "this is a test!") 100.0 + >>> fuzz.partial_ratio("abc", "abd", scorer="levenshtein") + 66.66666666666667 """ - alignment = partial_ratio_alignment(s1, s2, processor=processor, score_cutoff=score_cutoff) + alignment = partial_ratio_alignment(s1, s2, processor=processor, score_cutoff=score_cutoff, scorer=scorer) if alignment is None: return 0 @@ -263,6 +291,7 @@ def partial_ratio_alignment( *, processor=None, score_cutoff=None, + scorer="indel", ): """ Searches for the optimal alignment of the shorter string in the @@ -282,6 +311,10 @@ def partial_ratio_alignment( Optional argument for a score threshold as a float between 0 and 100. For ratio < score_cutoff None is returned instead. Default is 0, which deactivates this behaviour. + scorer : str, optional + The scoring algorithm to use. Options are "indel" (default) or "levenshtein". + - "indel": Uses Indel distance (substitution costs 2, i.e., delete + insert) + - "levenshtein": Uses Levenshtein distance (substitution costs 1) Returns ------- @@ -312,6 +345,8 @@ def partial_ratio_alignment( if score_cutoff is None: score_cutoff = 0 + scorer_func = _get_scorer_func(scorer) + if not s1 and not s2: return ScoreAlignment(100.0, 0, 0, 0, 0) s1, s2 = conv_sequences(s1, s2) @@ -324,10 +359,10 @@ def partial_ratio_alignment( shorter = s2 longer = s1 - res = _partial_ratio_impl(shorter, longer, score_cutoff / 100) + res = _partial_ratio_impl(shorter, longer, score_cutoff / 100, scorer_func) if res.score != 100 and len1 == len2: score_cutoff = max(score_cutoff, res.score) - res2 = _partial_ratio_impl(longer, shorter, score_cutoff / 100) + res2 = _partial_ratio_impl(longer, shorter, score_cutoff / 100, scorer_func) if res2.score > res.score: res = ScoreAlignment(res2.score, res2.dest_start, res2.dest_end, res2.src_start, res2.src_end) @@ -558,6 +593,7 @@ def partial_token_sort_ratio( *, processor=None, score_cutoff=None, + scorer="indel", ): """ sorts the words in the strings and calculates the fuzz.partial_ratio between them @@ -575,6 +611,8 @@ def partial_token_sort_ratio( Optional argument for a score threshold as a float between 0 and 100. For ratio < score_cutoff 0 is returned instead. Default is 0, which deactivates this behaviour. + scorer : str, optional + The scoring algorithm to use. Options are "indel" (default) or "levenshtein". Returns ------- @@ -596,7 +634,7 @@ def partial_token_sort_ratio( s1, s2 = conv_sequences(s1, s2) sorted_s1 = _join_splitted_sequence(sorted(_split_sequence(s1))) sorted_s2 = _join_splitted_sequence(sorted(_split_sequence(s2))) - return partial_ratio(sorted_s1, sorted_s2, score_cutoff=score_cutoff) + return partial_ratio(sorted_s1, sorted_s2, score_cutoff=score_cutoff, scorer=scorer) def partial_token_set_ratio( @@ -605,6 +643,7 @@ def partial_token_set_ratio( *, processor=None, score_cutoff=None, + scorer="indel", ): """ Compares the words in the strings based on unique and common words between them @@ -623,6 +662,8 @@ def partial_token_set_ratio( Optional argument for a score threshold as a float between 0 and 100. For ratio < score_cutoff 0 is returned instead. Default is 0, which deactivates this behaviour. + scorer : str, optional + The scoring algorithm to use. Options are "indel" (default) or "levenshtein". Returns ------- @@ -656,7 +697,7 @@ def partial_token_set_ratio( diff_ab = _join_splitted_sequence(sorted(tokens_a.difference(tokens_b))) diff_ba = _join_splitted_sequence(sorted(tokens_b.difference(tokens_a))) - return partial_ratio(diff_ab, diff_ba, score_cutoff=score_cutoff) + return partial_ratio(diff_ab, diff_ba, score_cutoff=score_cutoff, scorer=scorer) def partial_token_ratio( @@ -665,6 +706,7 @@ def partial_token_ratio( *, processor=None, score_cutoff=None, + scorer="indel", ): """ Helper method that returns the maximum of fuzz.partial_token_set_ratio and @@ -683,6 +725,8 @@ def partial_token_ratio( Optional argument for a score threshold as a float between 0 and 100. For ratio < score_cutoff 0 is returned instead. Default is 0, which deactivates this behaviour. + scorer : str, optional + The scoring algorithm to use. Options are "indel" (default) or "levenshtein". Returns ------- @@ -722,6 +766,7 @@ def partial_token_ratio( _join_splitted_sequence(sorted(tokens_split_a)), _join_splitted_sequence(sorted(tokens_split_b)), score_cutoff=score_cutoff, + scorer=scorer, ) # do not calculate the same partial_ratio twice @@ -735,6 +780,7 @@ def partial_token_ratio( _join_splitted_sequence(sorted(diff_ab)), _join_splitted_sequence(sorted(diff_ba)), score_cutoff=score_cutoff, + scorer=scorer, ), ) @@ -745,6 +791,7 @@ def WRatio( *, processor=None, score_cutoff=None, + scorer="indel", ): """ Calculates a weighted ratio based on the other ratio algorithms @@ -762,6 +809,8 @@ def WRatio( Optional argument for a score threshold as a float between 0 and 100. For ratio < score_cutoff 0 is returned instead. Default is 0, which deactivates this behaviour. + scorer : str, optional + The scoring algorithm to use. Options are "indel" (default) or "levenshtein". Returns ------- @@ -804,12 +853,12 @@ def WRatio( PARTIAL_SCALE = 0.9 if len_ratio <= 8.0 else 0.6 score_cutoff = max(score_cutoff, end_ratio) / PARTIAL_SCALE - end_ratio = max(end_ratio, partial_ratio(s1, s2, score_cutoff=score_cutoff) * PARTIAL_SCALE) + end_ratio = max(end_ratio, partial_ratio(s1, s2, score_cutoff=score_cutoff, scorer=scorer) * PARTIAL_SCALE) score_cutoff = max(score_cutoff, end_ratio) / UNBASE_SCALE return max( end_ratio, - partial_token_ratio(s1, s2, score_cutoff=score_cutoff, processor=None) * UNBASE_SCALE * PARTIAL_SCALE, + partial_token_ratio(s1, s2, score_cutoff=score_cutoff, processor=None, scorer=scorer) * UNBASE_SCALE * PARTIAL_SCALE, ) diff --git a/tests/test_fuzz.py b/tests/test_fuzz.py index 45472119..2747dffa 100644 --- a/tests/test_fuzz.py +++ b/tests/test_fuzz.py @@ -381,3 +381,99 @@ def testIssue257(): assert pytest.approx(score) == 98.46153846153847 score = fuzz.partial_ratio(s2, s1) assert pytest.approx(score) == 98.46153846153847 + + +class TestScorerParameter: + """Tests for the scorer parameter in partial_ratio and downstream functions.""" + + def test_partial_ratio_default_is_indel(self): + """Default scorer should be indel.""" + # Test both Python and C++ implementations directly + for impl in [fuzz_py, fuzz_cpp]: + result1 = impl.partial_ratio("abc", "abcdef") + result2 = impl.partial_ratio("abc", "abcdef", scorer="indel") + assert result1 == result2 + + def test_partial_ratio_levenshtein_differs(self): + """Levenshtein scorer should give different results for substitutions.""" + # From the user's example: different results for substitutions + a = "34cdef16z" + c = "09cdef78" + for impl in [fuzz_py, fuzz_cpp]: + result_indel = impl.partial_ratio(a, c, scorer="indel") + result_lev = impl.partial_ratio(a, c, scorer="levenshtein") + # Indel: 57.14, Levenshtein: 50.0 + assert pytest.approx(result_indel, rel=0.01) == 57.14 + assert pytest.approx(result_lev, rel=0.01) == 50.0 + assert result_indel != result_lev + + def test_partial_ratio_alignment_with_scorer(self): + """partial_ratio_alignment should accept scorer parameter.""" + a = "34cdef16z" + c = "09cdef78" + for impl in [fuzz_py, fuzz_cpp]: + alignment_indel = impl.partial_ratio_alignment(a, c, scorer="indel") + alignment_lev = impl.partial_ratio_alignment(a, c, scorer="levenshtein") + assert alignment_indel is not None + assert alignment_lev is not None + assert pytest.approx(alignment_indel.score, rel=0.01) == 57.14 + assert pytest.approx(alignment_lev.score, rel=0.01) == 50.0 + + def test_invalid_scorer_raises(self): + """Invalid scorer should raise ValueError.""" + with pytest.raises(ValueError): + fuzz_py.partial_ratio("abc", "def", scorer="invalid") + + def test_partial_token_sort_ratio_with_scorer(self): + """partial_token_sort_ratio should accept scorer parameter.""" + for impl in [fuzz_py, fuzz_cpp]: + result_indel = impl.partial_token_sort_ratio("abc def", "fed cba", scorer="indel") + result_lev = impl.partial_token_sort_ratio("abc def", "fed cba", scorer="levenshtein") + assert 0 <= result_indel <= 100 + assert 0 <= result_lev <= 100 + + def test_partial_token_set_ratio_with_scorer(self): + """partial_token_set_ratio should accept scorer parameter.""" + for impl in [fuzz_py, fuzz_cpp]: + result_indel = impl.partial_token_set_ratio("abc xyz", "def xyz", scorer="indel") + result_lev = impl.partial_token_set_ratio("abc xyz", "def xyz", scorer="levenshtein") + # With common word "xyz", both should return 100 + assert result_indel == 100 + assert result_lev == 100 + + def test_partial_token_ratio_with_scorer(self): + """partial_token_ratio should accept scorer parameter.""" + for impl in [fuzz_py, fuzz_cpp]: + result_indel = impl.partial_token_ratio("abc def", "fed cba", scorer="indel") + result_lev = impl.partial_token_ratio("abc def", "fed cba", scorer="levenshtein") + assert 0 <= result_indel <= 100 + assert 0 <= result_lev <= 100 + + def test_wratio_with_scorer(self): + """WRatio should accept scorer parameter.""" + for impl in [fuzz_py, fuzz_cpp]: + result_indel = impl.WRatio("test string", "testing strings", scorer="indel") + result_lev = impl.WRatio("test string", "testing strings", scorer="levenshtein") + assert 0 <= result_indel <= 100 + assert 0 <= result_lev <= 100 + + def test_scorer_with_score_cutoff(self): + """Scorer parameter should work with score_cutoff.""" + a = "34cdef16z" + c = "09cdef78" + for impl in [fuzz_py, fuzz_cpp]: + # Levenshtein gives 50.0, so cutoff of 55 should return 0 + result = impl.partial_ratio(a, c, scorer="levenshtein", score_cutoff=55) + assert result == 0 + # But indel gives 57.14, so cutoff of 55 should return the score + result = impl.partial_ratio(a, c, scorer="indel", score_cutoff=55) + assert pytest.approx(result, rel=0.01) == 57.14 + + def test_cpp_and_py_consistency(self): + """C++ and Python implementations should give same results for scorer parameter.""" + a = "34cdef16z" + c = "09cdef78" + for scorer_val in ["indel", "levenshtein"]: + py_result = fuzz_py.partial_ratio(a, c, scorer=scorer_val) + cpp_result = fuzz_cpp.partial_ratio(a, c, scorer=scorer_val) + assert pytest.approx(py_result) == cpp_result