Refactored - but not finished yet
This commit is contained in:
parent
6635321512
commit
1c5c3bc246
@ -45,10 +45,14 @@ class MatchError(IndexError):
|
|||||||
"A suitable match could not be found"
|
"A suitable match could not be found"
|
||||||
def __init__(self, items = None, tofind = ''):
|
def __init__(self, items = None, tofind = ''):
|
||||||
"Init the parent with the message"
|
"Init the parent with the message"
|
||||||
if items is None:
|
self.tofind = tofind
|
||||||
items = []
|
self.items = items
|
||||||
|
if self.items is None:
|
||||||
|
self.items = []
|
||||||
|
|
||||||
IndexError.__init__(self,
|
IndexError.__init__(self,
|
||||||
"Could not find '%s' in '%s'"% (tofind, items))
|
"Could not find '%s' in '%s'"% (tofind, self.items))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -88,48 +92,22 @@ def find_best_match(search_text, item_texts, items):
|
|||||||
"Return the item that best matches the search_text"
|
"Return the item that best matches the search_text"
|
||||||
search_text = _clean_text(search_text)
|
search_text = _clean_text(search_text)
|
||||||
|
|
||||||
|
|
||||||
|
text_item_map = UniqueDict()
|
||||||
# Clean each item, make it unique and map to
|
# Clean each item, make it unique and map to
|
||||||
# to the item index
|
# to the item index
|
||||||
item_index_map = _build_unique_index_map(item_texts)
|
for text, item in zip(item_texts, items):
|
||||||
|
text_item_map[text] = item
|
||||||
|
|
||||||
ratios, best_ratio, best_text = \
|
ratios, best_ratio, best_text = \
|
||||||
_get_match_ratios(item_index_map.keys(), search_text)
|
_get_match_ratios(text_item_map.keys(), search_text)
|
||||||
|
|
||||||
if best_ratio < .5:
|
if best_ratio < .5:
|
||||||
raise MatchError(items = item_index_map.keys(), tofind = search_text)
|
raise MatchError(items = text_item_map.keys(), tofind = search_text)
|
||||||
|
|
||||||
return items[item_index_map[best_text]]
|
return text_item_map[best_text]
|
||||||
|
|
||||||
|
|
||||||
#====================================================================
|
|
||||||
def _build_unique_index_map(items):
|
|
||||||
"""Build a map of item to item index making sure that each is unique"""
|
|
||||||
mapped_items = {}
|
|
||||||
|
|
||||||
for i, text in enumerate(items):
|
|
||||||
text = _clean_text(text)
|
|
||||||
|
|
||||||
# no duplicates so just store it without modification
|
|
||||||
if text not in mapped_items:
|
|
||||||
mapped_items[text] = i
|
|
||||||
|
|
||||||
# else this item appears multiple times
|
|
||||||
else:
|
|
||||||
# find unique text
|
|
||||||
unique_text = text
|
|
||||||
counter = 2
|
|
||||||
while unique_text in mapped_items:
|
|
||||||
unique_text = text + str(counter)
|
|
||||||
counter += 1
|
|
||||||
|
|
||||||
mapped_items[unique_text] = i
|
|
||||||
|
|
||||||
if not mapped_items.has_key(text + "0"):
|
|
||||||
mapped_items[text + "0"] = mapped_items[text]
|
|
||||||
mapped_items[text + "1"] = mapped_items[text]
|
|
||||||
|
|
||||||
return mapped_items
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -168,19 +146,114 @@ def _get_control_names(control):
|
|||||||
|
|
||||||
# if it has some character text then add it base on that
|
# if it has some character text then add it base on that
|
||||||
# and based on that with friendly class name appended
|
# and based on that with friendly class name appended
|
||||||
if _clean_text(control.Text):
|
cleaned = _clean_text(control.Text)
|
||||||
names.append(control.Text)
|
if cleaned and cleaned not in names:
|
||||||
names.append(control.Text + control.FriendlyClassName)
|
names.append(cleaned)
|
||||||
|
name_fclass = cleaned + control.FriendlyClassName
|
||||||
|
|
||||||
# return the names (either 1 or 3 strings)
|
if name_fclass not in names:
|
||||||
|
names.append(name_fclass)
|
||||||
|
|
||||||
|
# return the names
|
||||||
return names
|
return names
|
||||||
|
|
||||||
|
|
||||||
#TODO: Move uniquefying code out of this function and use
|
|
||||||
# _build_unique_index_map() to do it. (if that functions needs changing
|
|
||||||
# then do it and modify functions that call it if necessary also!
|
|
||||||
#====================================================================
|
#====================================================================
|
||||||
def find_best_control_match(search_text, controls):
|
class UniqueDict(dict):
|
||||||
|
|
||||||
|
def __setitem__(self, text, item):
|
||||||
|
|
||||||
|
# this text is already in the map
|
||||||
|
# so we need to make it unique
|
||||||
|
if text in self:
|
||||||
|
# find next unique text after text1
|
||||||
|
unique_text = text
|
||||||
|
counter = 2
|
||||||
|
while unique_text in self:
|
||||||
|
unique_text = text + str(counter)
|
||||||
|
counter += 1
|
||||||
|
|
||||||
|
# now we also need to make sure the original item
|
||||||
|
# is under text0 and text1 also!
|
||||||
|
if text + '0' not in self:
|
||||||
|
dict.__setitem__(self, text+'0', self[text])
|
||||||
|
dict.__setitem__(self, text+'1', self[text])
|
||||||
|
|
||||||
|
# now that we don't need original 'text' anymore
|
||||||
|
# replace it with the uniq text
|
||||||
|
text = unique_text
|
||||||
|
|
||||||
|
# add our current item
|
||||||
|
dict.__setitem__(self, text, item)
|
||||||
|
|
||||||
|
|
||||||
|
def FindBestMatches(self, search_text):
|
||||||
|
# now time to figure out the matching
|
||||||
|
ratio_calc = difflib.SequenceMatcher()
|
||||||
|
ratio_calc.set_seq1(search_text)
|
||||||
|
|
||||||
|
|
||||||
|
ratios = {}
|
||||||
|
best_ratio = 0
|
||||||
|
best_texts = []
|
||||||
|
|
||||||
|
for text in self:
|
||||||
|
# set up the SequenceMatcher with other text
|
||||||
|
ratio_calc.set_seq2(text)
|
||||||
|
|
||||||
|
# calculate ratio and store it
|
||||||
|
ratios[text] = ratio_calc.ratio()
|
||||||
|
|
||||||
|
# if this is the best so far then update best stats
|
||||||
|
if ratios[text] > best_ratio:
|
||||||
|
best_ratio = ratios[text]
|
||||||
|
best_texts = [text]
|
||||||
|
|
||||||
|
elif ratios[text] == best_ratio:
|
||||||
|
best_texts.append(text)
|
||||||
|
|
||||||
|
return best_ratio, best_texts
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def GetControlMatchRatio(text, ctrl):
|
||||||
|
# get the texts for the control
|
||||||
|
ctrl_names = _get_control_names(ctrl)
|
||||||
|
|
||||||
|
#get the best match for these
|
||||||
|
matcher = UniqueDict()
|
||||||
|
for name in ctrl_names:
|
||||||
|
matcher[name] = ctrl
|
||||||
|
|
||||||
|
best_ratio, unused = matcher.FindBestMatches(text)
|
||||||
|
|
||||||
|
return best_ratio
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_controls_ratios(search_text, controls):
|
||||||
|
name_control_map = UniqueDict()
|
||||||
|
|
||||||
|
# collect all the possible names for all controls
|
||||||
|
# and build a list of them
|
||||||
|
for ctrl in controls:
|
||||||
|
ctrl_names = _get_control_names(ctrl)
|
||||||
|
|
||||||
|
# for each of the names
|
||||||
|
for name in ctrl_names:
|
||||||
|
name_control_map[name] = ctrl
|
||||||
|
|
||||||
|
match_ratios, best_ratio, best_text = \
|
||||||
|
_get_match_ratios(name_control_map.keys(), search_text)
|
||||||
|
|
||||||
|
return match_ratios, best_ratio, best_text,
|
||||||
|
|
||||||
|
|
||||||
|
#====================================================================
|
||||||
|
def find_best_control_matches(search_text, controls):
|
||||||
"""Returns the control that is the the best match to search_text
|
"""Returns the control that is the the best match to search_text
|
||||||
|
|
||||||
This is slightly differnt from find_best_match in that it builds
|
This is slightly differnt from find_best_match in that it builds
|
||||||
@ -193,52 +266,28 @@ def find_best_control_match(search_text, controls):
|
|||||||
then it will just add "ListView".
|
then it will just add "ListView".
|
||||||
"""
|
"""
|
||||||
|
|
||||||
name_control_map = {}
|
name_control_map = UniqueDict()
|
||||||
|
|
||||||
# collect all the possible names for all controls
|
# collect all the possible names for all controls
|
||||||
# and build a list of them
|
# and build a list of them
|
||||||
for ctrl in controls:
|
for ctrl in controls:
|
||||||
ctrl_names = _get_control_names(ctrl)
|
ctrl_names = _get_control_names(ctrl)
|
||||||
ctrl_names = [_clean_text(name) for name in ctrl_names]
|
|
||||||
|
|
||||||
# remove duplicates
|
|
||||||
ctrl_names = list(set(ctrl_names))
|
|
||||||
|
|
||||||
# for each of the names
|
# for each of the names
|
||||||
for name in ctrl_names:
|
for name in ctrl_names:
|
||||||
|
name_control_map[name] = ctrl
|
||||||
|
|
||||||
# if its not there already then just add it
|
best_ratio, best_texts = name_control_map.FindBestMatches(search_text)
|
||||||
if not name_control_map.has_key(name):
|
|
||||||
|
|
||||||
name_control_map[name] = ctrl
|
#if len(best_texts)
|
||||||
|
|
||||||
# else this item appears multiple times
|
#match_ratios, best_ratio, best_text = \
|
||||||
else:
|
# _get_match_ratios(name_control_map.keys(), search_text)
|
||||||
# find unique name
|
|
||||||
unique_text = name
|
|
||||||
counter = 2
|
|
||||||
while unique_text in name_control_map:
|
|
||||||
unique_text = name + str(counter)
|
|
||||||
counter += 1
|
|
||||||
|
|
||||||
# add it with that unique text
|
|
||||||
name_control_map[unique_text] = ctrl
|
|
||||||
|
|
||||||
# and if this was the first time that we noticied that
|
|
||||||
# it was a duplicated name then add new items based on the
|
|
||||||
# duplicated name but add '0' and '1'
|
|
||||||
if not name_control_map.has_key(name + "0"):
|
|
||||||
name_control_map[name + "0"] = name_control_map[name]
|
|
||||||
name_control_map[name + "1"] = name_control_map[name]
|
|
||||||
|
|
||||||
|
|
||||||
match_ratios, best_ratio, best_text = \
|
|
||||||
_get_match_ratios(name_control_map.keys(), search_text)
|
|
||||||
|
|
||||||
if best_ratio < .5:
|
if best_ratio < .5:
|
||||||
raise MatchError(items = name_control_map.keys(), tofind = search_text)
|
raise MatchError(items = name_control_map.keys(), tofind = search_text)
|
||||||
|
|
||||||
return name_control_map[best_text]
|
return [name_control_map[best_text] for best_text in best_texts]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user