Trying out levenshtein matching - but commented it out again - as that doesn't seem to be where things are slow.
This commit is contained in:
parent
d6d7de42b0
commit
c51d7a2c78
@ -26,6 +26,11 @@ import re
|
||||
import difflib
|
||||
|
||||
import fuzzydict
|
||||
#import ctypes
|
||||
#import ldistance
|
||||
#levenshtein_distance = ctypes.cdll.levenshtein.levenshtein_distance
|
||||
#levenshtein_distance = ldistance.distance
|
||||
|
||||
|
||||
# need to use sets.Set for python 2.3 compatability
|
||||
import sets
|
||||
@ -66,6 +71,11 @@ def _get_match_ratios(texts, match_against):
|
||||
# set up the SequenceMatcher with other text
|
||||
ratio_calc.set_seq2(text)
|
||||
|
||||
# try using the levenshtein distance instead
|
||||
#lev_dist = levenshtein_distance(unicode(match_against), unicode(text))
|
||||
#ratio = 1 - lev_dist / 10.0
|
||||
#ratios[text] = ratio
|
||||
|
||||
# calculate ratio and store it
|
||||
ratios[text] = ratio_calc.ratio()
|
||||
|
||||
@ -332,6 +342,13 @@ class UniqueDict(dict):
|
||||
# calculate ratio and store it
|
||||
ratios[text_] = ratio_calc.ratio()
|
||||
|
||||
|
||||
# try using the levenshtein distance instead
|
||||
#lev_dist = levenshtein_distance(unicode(search_text), unicode(text))
|
||||
#ratio = 1 - lev_dist / 10.0
|
||||
#ratios[text_] = ratio
|
||||
#print "%5s" %("%0.2f"% ratio), search_text, `text`
|
||||
|
||||
# if this is the best so far then update best stats
|
||||
if ratios[text_] > best_ratio:
|
||||
best_ratio = ratios[text_]
|
||||
|
Loading…
Reference in New Issue
Block a user