diff --git a/dedupe/canopy_index.py b/dedupe/canopy_index.py index 66e926b3..da3eb995 100644 --- a/dedupe/canopy_index.py +++ b/dedupe/canopy_index.py @@ -4,8 +4,8 @@ import math from typing import Iterable -import numpy from BTrees.Length import Length +from numpy import log from zope.index.text.cosineindex import CosineIndex from zope.index.text.lexicon import Lexicon from zope.index.text.setops import mass_weightedUnion @@ -37,7 +37,7 @@ def initSearch(self) -> None: docs = bucket(docs) self.index._wordinfo[wid] = docs - idf = numpy.log1p(N / len(docs)) + idf = 1 + log((1 + N) / (1 + len(docs))) term = self.lexicon._words[wid] self._wids_dict[term] = (wid, idf)