Refactored - but not finished yet

This commit is contained in:
markm 2006-01-26 22:21:33 +00:00
parent 6635321512
commit 1c5c3bc246

@ -45,10 +45,14 @@ class MatchError(IndexError):
"A suitable match could not be found" "A suitable match could not be found"
def __init__(self, items = None, tofind = ''): def __init__(self, items = None, tofind = ''):
"Init the parent with the message" "Init the parent with the message"
if items is None: self.tofind = tofind
items = [] self.items = items
if self.items is None:
self.items = []
IndexError.__init__(self, IndexError.__init__(self,
"Could not find '%s' in '%s'"% (tofind, items)) "Could not find '%s' in '%s'"% (tofind, self.items))
@ -88,48 +92,22 @@ def find_best_match(search_text, item_texts, items):
"Return the item that best matches the search_text" "Return the item that best matches the search_text"
search_text = _clean_text(search_text) search_text = _clean_text(search_text)
text_item_map = UniqueDict()
# Clean each item, make it unique and map to # Clean each item, make it unique and map to
# to the item index # to the item index
item_index_map = _build_unique_index_map(item_texts) for text, item in zip(item_texts, items):
text_item_map[text] = item
ratios, best_ratio, best_text = \ ratios, best_ratio, best_text = \
_get_match_ratios(item_index_map.keys(), search_text) _get_match_ratios(text_item_map.keys(), search_text)
if best_ratio < .5: if best_ratio < .5:
raise MatchError(items = item_index_map.keys(), tofind = search_text) raise MatchError(items = text_item_map.keys(), tofind = search_text)
return items[item_index_map[best_text]] return text_item_map[best_text]
#====================================================================
def _build_unique_index_map(items):
"""Build a map of item to item index making sure that each is unique"""
mapped_items = {}
for i, text in enumerate(items):
text = _clean_text(text)
# no duplicates so just store it without modification
if text not in mapped_items:
mapped_items[text] = i
# else this item appears multiple times
else:
# find unique text
unique_text = text
counter = 2
while unique_text in mapped_items:
unique_text = text + str(counter)
counter += 1
mapped_items[unique_text] = i
if not mapped_items.has_key(text + "0"):
mapped_items[text + "0"] = mapped_items[text]
mapped_items[text + "1"] = mapped_items[text]
return mapped_items
@ -168,19 +146,114 @@ def _get_control_names(control):
# if it has some character text then add it base on that # if it has some character text then add it base on that
# and based on that with friendly class name appended # and based on that with friendly class name appended
if _clean_text(control.Text): cleaned = _clean_text(control.Text)
names.append(control.Text) if cleaned and cleaned not in names:
names.append(control.Text + control.FriendlyClassName) names.append(cleaned)
name_fclass = cleaned + control.FriendlyClassName
# return the names (either 1 or 3 strings) if name_fclass not in names:
names.append(name_fclass)
# return the names
return names return names
#TODO: Move uniquefying code out of this function and use
# _build_unique_index_map() to do it. (if that functions needs changing
# then do it and modify functions that call it if necessary also!
#==================================================================== #====================================================================
def find_best_control_match(search_text, controls): class UniqueDict(dict):
def __setitem__(self, text, item):
# this text is already in the map
# so we need to make it unique
if text in self:
# find next unique text after text1
unique_text = text
counter = 2
while unique_text in self:
unique_text = text + str(counter)
counter += 1
# now we also need to make sure the original item
# is under text0 and text1 also!
if text + '0' not in self:
dict.__setitem__(self, text+'0', self[text])
dict.__setitem__(self, text+'1', self[text])
# now that we don't need original 'text' anymore
# replace it with the uniq text
text = unique_text
# add our current item
dict.__setitem__(self, text, item)
def FindBestMatches(self, search_text):
# now time to figure out the matching
ratio_calc = difflib.SequenceMatcher()
ratio_calc.set_seq1(search_text)
ratios = {}
best_ratio = 0
best_texts = []
for text in self:
# set up the SequenceMatcher with other text
ratio_calc.set_seq2(text)
# calculate ratio and store it
ratios[text] = ratio_calc.ratio()
# if this is the best so far then update best stats
if ratios[text] > best_ratio:
best_ratio = ratios[text]
best_texts = [text]
elif ratios[text] == best_ratio:
best_texts.append(text)
return best_ratio, best_texts
def GetControlMatchRatio(text, ctrl):
# get the texts for the control
ctrl_names = _get_control_names(ctrl)
#get the best match for these
matcher = UniqueDict()
for name in ctrl_names:
matcher[name] = ctrl
best_ratio, unused = matcher.FindBestMatches(text)
return best_ratio
def get_controls_ratios(search_text, controls):
name_control_map = UniqueDict()
# collect all the possible names for all controls
# and build a list of them
for ctrl in controls:
ctrl_names = _get_control_names(ctrl)
# for each of the names
for name in ctrl_names:
name_control_map[name] = ctrl
match_ratios, best_ratio, best_text = \
_get_match_ratios(name_control_map.keys(), search_text)
return match_ratios, best_ratio, best_text,
#====================================================================
def find_best_control_matches(search_text, controls):
"""Returns the control that is the the best match to search_text """Returns the control that is the the best match to search_text
This is slightly differnt from find_best_match in that it builds This is slightly differnt from find_best_match in that it builds
@ -193,52 +266,28 @@ def find_best_control_match(search_text, controls):
then it will just add "ListView". then it will just add "ListView".
""" """
name_control_map = {} name_control_map = UniqueDict()
# collect all the possible names for all controls # collect all the possible names for all controls
# and build a list of them # and build a list of them
for ctrl in controls: for ctrl in controls:
ctrl_names = _get_control_names(ctrl) ctrl_names = _get_control_names(ctrl)
ctrl_names = [_clean_text(name) for name in ctrl_names]
# remove duplicates
ctrl_names = list(set(ctrl_names))
# for each of the names # for each of the names
for name in ctrl_names: for name in ctrl_names:
name_control_map[name] = ctrl
# if its not there already then just add it best_ratio, best_texts = name_control_map.FindBestMatches(search_text)
if not name_control_map.has_key(name):
name_control_map[name] = ctrl #if len(best_texts)
# else this item appears multiple times #match_ratios, best_ratio, best_text = \
else: # _get_match_ratios(name_control_map.keys(), search_text)
# find unique name
unique_text = name
counter = 2
while unique_text in name_control_map:
unique_text = name + str(counter)
counter += 1
# add it with that unique text
name_control_map[unique_text] = ctrl
# and if this was the first time that we noticied that
# it was a duplicated name then add new items based on the
# duplicated name but add '0' and '1'
if not name_control_map.has_key(name + "0"):
name_control_map[name + "0"] = name_control_map[name]
name_control_map[name + "1"] = name_control_map[name]
match_ratios, best_ratio, best_text = \
_get_match_ratios(name_control_map.keys(), search_text)
if best_ratio < .5: if best_ratio < .5:
raise MatchError(items = name_control_map.keys(), tofind = search_text) raise MatchError(items = name_control_map.keys(), tofind = search_text)
return name_control_map[best_text] return [name_control_map[best_text] for best_text in best_texts]