pywinauto/Branches/OriginalPydlgChecks/findbestmatch.py

import re
import difflib


#====================================================================
class MatchError(IndexError):
	def __init__(self, msg = '', items = [], tofind = ''):
		Exception.__init__(self, msg)

		self.items = items
		self.tofind = tofind

	def __str__(self):
		return "Could not find '%s' in '%s'"% (self.tofind, self.items)

#====================================================================
def clean_text(text):

	# remove anything after the first tab
	text_before_tab = re.sub(r"\t.*", "", text)

	# remove any whitespace or non alphanumeric characters
	return re.sub(r"[^\w ]|\s+", "", text_before_tab).lower()


#====================================================================
def build_unique_index_map(items):
	mapped_items = {}

	#counters = {}
	for i, text in enumerate(items):
		text = clean_text(text)

		# no duplicates so just store it without modification
		if text not in mapped_items:
			mapped_items[text] = i

		# else this item appears multiple times
		else:
			# find unique text
			unique_text = text
			counter = 2
			while unique_text in mapped_items:
				unique_text = text + str(counter)
				counter += 1

			mapped_items[unique_text] = i

			if not mapped_items.has_key(text + "0"):
				mapped_items[text + "0"] = mapped_items[text]
				mapped_items[text + "1"] = mapped_items[text]

	return mapped_items


#====================================================================
def find_best_match(search_text, item_texts, items):
	search_text = clean_text(search_text)

	# Clean each item, make it unique and map to
	# to the item index
	item_index_map = build_unique_index_map(item_texts)

	# find the list of best matches
	matches = difflib.get_close_matches (search_text, item_index_map.keys())

	# best match is the first one - so get the index stored
	# for that match text
	try:
		best_index = item_index_map[matches[0]]
	except IndexError:
		raise MatchError(items = item_texts, tofind = search_text)


	return items[best_index]


#====================================================================
def get_control_names(control):
	names = []

	# if it has a reference control - then use that
	if hasattr(control, 'ref') and control.ref:
		control = control.ref

	# Add the control based on it's friendly class name
	names.append(control.FriendlyClassName)

	# if it has some character text then add it base on that
	# and based on that with friendly class name appended
	if clean_text(control.Text):
		names.append(control.Text)
		names.append(control.Text + control.FriendlyClassName)

	# return the names (either 1 or 3 strings)
	return names


#====================================================================
def junk_func(char):
	if char in ':"/ \t\n\r][{}=-\\|!@#$%^&*,.<>?/()':
		return True

	return False


#====================================================================
def clean_text2(text): # doesn't change text to lowercase

	# remove anything after the first tab
	text_before_tab = re.sub(r"\t.*", "", text)

	# remove any whitespace or non alphanumeric characters
	return re.sub(r"\W", "", text_before_tab)


#====================================================================
def find_best_control_match(search_text, controls):


	name_control_map = {}

	# collect all the possible names for all controls
	# and build a list of them
	for c in controls:
		ctrl_names = get_control_names(c)
		ctrl_names = [clean_text2(n) for n in ctrl_names]

		# remove duplicates
		ctrl_names = list(set(ctrl_names))

		# for each of the names
		for n in ctrl_names:

			# if its not there already then just add it
			if not name_control_map.has_key(n):

				name_control_map[n] = c

			# else this item appears multiple times
			else:
				# find unique name
				unique_text = n
				counter = 2
				while unique_text in name_control_map:
					unique_text = n + str(counter)
					counter += 1

				# add it with that unique text
				name_control_map[unique_text] = c

				# and if this was the first time that we noticied that
				# it was a duplicated name then add new items based on the
				# duplicated name but add '0' and '1'
				if not name_control_map.has_key(n + "0"):
					name_control_map[n + "0"] = name_control_map[n]
					name_control_map[n + "1"] = name_control_map[n]


	# now time to figre out the matching
	ratio_calc = difflib.SequenceMatcher()
	ratio_calc.set_seq1(clean_text2(search_text))

	best_ratio = 0
	best_control = None

	for name, control in name_control_map.items():
		ratio_calc.set_seq2(name)

		if ratio_calc.ratio() > best_ratio:
			best_ratio = ratio_calc.quick_ratio()
			best_control = control

	if best_ratio < .5:
		raise MatchError(items = name_control_map.keys(), tofind = search_text)

	return best_control