can resolve captcha with 2captcha

This commit is contained in:
PAN Lei
2022-05-23 13:12:55 +02:00
parent e14ed405fb
commit 380398c768
2 changed files with 43 additions and 110 deletions
+39 -68
View File
@@ -1,82 +1,53 @@
import random
# for recaptcha
import urllib
from speech_recognition import Recognizer, AudioFile
import logging
import random
import os
import re
import time
import requests
CAPCHA_NOT_READY = "CAPCHA_NOT_READY"
REGEX_DATA_SITE_KEY = "data-sitekey=[\"a-z0-9A-Z]+"
class SolveCaptcha:
def __init__(self, page):
self.page = page
self.logger = logging.getLogger("SolveCaptcha")
self.main_frame = None
self.recaptcha = None
def delay(self):
self.page.wait_for_timeout(random.randint(1, 3) * 1000)
def presetup(self):
name = self.page.locator(
"//iframe[@title='reCAPTCHA']").get_attribute("name")
self.recaptcha = self.page.frame(name=name)
def start(self, handle_solution_received):
self.logger.info("start to resolve captcha")
content = self.page.content()
data_sitekey = re.findall(REGEX_DATA_SITE_KEY, content)
self.logger.info(data_sitekey)
if len(data_sitekey) == 1:
key_with_comma = data_sitekey[0].split("=")[-1]
key = key_with_comma.replace("\"", '')
print("key is : " + key)
self.solve_captcha(key, handle_solution_received)
self.recaptcha.click("//div[@class='recaptcha-checkbox-border']")
self.delay()
s = self.recaptcha.locator("//span[@id='recaptcha-anchor']")
if s.get_attribute("aria-checked") != "false": # solved already
return
# self.main_frame = self.page.frame(name=self.page.locator(
# "//iframe[contains(@src,'https://www.google.com/recaptcha/api2/bframe?')]").get_attribute("name"))
# self.main_frame.click("id=recaptcha-audio-button")
def start(self):
print("start to resolve captcha")
self.presetup()
tries = 0
while (tries <= 5):
self.delay()
try:
self.solve_captcha()
except Exception as e:
print("exception:")
print(e)
self.main_frame.click("id=recaptcha-reload-button")
else:
s = self.recaptcha.locator("//span[@id='recaptcha-anchor']")
if s.get_attribute("aria-checked") != "false":
self.page.click("id=recaptcha-demo-submit")
self.delay()
break
tries += 1
def solve_captcha(self):
print("solve_captcha()")
# self.main_frame.click(
# "//button[@aria-labelledby='audio-instructions rc-response-label']")
# href = self.main_frame.locator(
# "//a[@class='rc-audiochallenge-tdownload-link']").get_attribute("href")
#
# print("retrieve mp3 file")
# urllib.request.urlretrieve(href, "audio.mp3")
#
# sound = pydub.AudioSegment.from_mp3(
# "audio.mp3").export("audio.wav", format="wav")
#
# recognizer = Recognizer()
#
# recaptcha_audio = AudioFile("audio.wav")
# with recaptcha_audio as source:
# audio = recognizer.record(source)
# print("recognizer mp3 file")
#
# text = recognizer.recognize_google(audio)
# print("recognized text: " + text)
# self.main_frame.fill("id=audio-response", text)
# self.main_frame.click("id=recaptcha-verify-button")
# self.delay()
def __del__(self):
pass
# os.remove("audio.mp3")
# os.remove("audio.wav")
def solve_captcha(self, google_key: str, handle_solution_received):
self.logger.info("solve_captcha()")
url_get = "http://2captcha.com/in.php?key=e7e3cd0977aba2dab49e0ea052ca58b1&method=userrecaptcha&googlekey={}&pageurl={}".format(
google_key, self.page.url)
res = requests.get(url_get)
print(res.text)
results = res.text.split("|")
self.captcha_id = results[-1]
# wait for 15 seconds
time.sleep(15)
# get result of the captcha
url_response = "http://2captcha.com/res.php?key=e7e3cd0977aba2dab49e0ea052ca58b1&action=get&id={}".format(
self.captcha_id)
solution = CAPCHA_NOT_READY
while solution == CAPCHA_NOT_READY:
solution_res = requests.get(url_response)
time.sleep(5)
solution = solution_res.text
print(solution)
handle_solution_received(solution.split("|")[-1])