Multi-scale Template Match Vs. Text Detection
I'm trying to automate the navigation of a website to grab data and download files using PyAutoGUI to detect images and buttons, but I'm having trouble using this on other people's
Solution 1:
Following my comment above, this is how the modified function could look like
# Functions to search for resized versions of imagesdeftemplate_match_with_scaling(image,gs=True,confidence=0.8, scalingrange=None):
# Locate an image and return a pyscreeze box surrounding it. # Template matching is done by default in grayscale (gs=True)# Detect image if normalized correlation coefficient is > confidence (0.8 is default)
templateim = pyscreeze._load_cv2(image,grayscale=gs) # loads the image
(tH, tW) = templateim.shape[:2] # changes the orientation
screenim_color = pyautogui.screenshot() # screenshot of image
screenim_color = cv2.cvtColor(np.array(screenim_color),cv2.COLOR_RGB2BGR)
# Checking if the locateOnScreen() is utilized with grayscale=True or notif gs isTrue:
screenim = cv2.cvtColor(np.array(screenim_color),cv2.COLOR_BGR2GRAY)
else:
screenim = screenim_color
#try different scaling parameters and see which one matches best
found = None#bookeeping variable for the maximum correlation coefficient, position and scalefor scalex in scalingrange:
width = int(templateim.shape[1] * scalex)
for scaley in scalingrange:
#print("Trying another scale")#print(scalex,scaley)
height = int(templateim.shape[0] * scaley)
scaledsize = (width, height)
# resize image
resizedtemplate = cv2.resize(templateim, scaledsize)
#resizedtemplate = imutils.resize(templateim, width = int(templateim.shape[1]*scale) ) # resizing with imutils maintains the aspect ratio
ry = float(resizedtemplate.shape[1])/templateim.shape[1] # recompute scaling factor
rx = float(resizedtemplate.shape[0])/templateim.shape[0] # recompute scaling factor
result = cv2.matchTemplate(screenim, resizedtemplate, cv2.TM_CCOEFF_NORMED) # template matching using the correlation coefficient
(_, maxVal, _, maxLoc) = cv2.minMaxLoc(result) #returns a 4-tuple which includes the minimum correlation value, the maximum correlation value, the (x, y)-coordinate of the minimum value, and the (x, y)-coordinate of the maximum valueif found isNoneor maxVal > found[0]:
found = (maxVal, maxLoc, rx, ry)
(maxVal, maxLoc, rx, ry) = found
print('maxVal= ', maxVal)
if maxVal > confidence:
box = pyscreeze.Box(int(maxLoc[0]), int(maxLoc[1]), int(tW*rx), int(tH*ry) )
return box
else:
returnNonedeflocate_center_with_scaling(image,gs=True,**kwargs):
loc = template_match_with_scaling(image,gs=gs,**kwargs)
if loc:
return pyautogui.center(loc)
else:
raise Exception("Image not found")
im = 'DescriptionBox.png'# we will try to detect the small description box, whose width and height are scaled down by 0.54 and 0.47
unscaledLocation = pyautogui.locateOnScreen(im, grayscale=True, confidence=0.8 )
srange = np.linspace(0.4,0.6,num=20) #scale width and height in this rangeif unscaledLocation isNone:
print("Looking for Description Box.")
scaledLocation = locate_center_with_scaling(im, scalingrange= srange)
if scaledLocation isnotNone:
print(f'Found a resized version of Description Box at ({scaledLocation[0]},{scaledLocation[1]})')
pyautogui.moveTo(scaledLocation[0], scaledLocation[1])
We need to be mindful of two things:
template_match_with_scaling
is now executing a double loop, one over each dimension so it will take some time to detect the template image. To amortize the detection time, we should save the scale parameters for width and height after the first detection, and scale all template images by these parameters for subsequent detections.- to be able to detect the template efficiently, we need to set the
scalingrange
input oftemplate_match_with_scaling
to an appropriate range of values. If the range is either small or doesn't have enough values, we will not be able to detect the template. If it is too large, detection time will be large.
Post a Comment for "Multi-scale Template Match Vs. Text Detection"