can resolve captcha with 2captcha
This commit is contained in:
+39
-68
@@ -1,82 +1,53 @@
|
||||
import random
|
||||
# for recaptcha
|
||||
import urllib
|
||||
from speech_recognition import Recognizer, AudioFile
|
||||
import logging
|
||||
import random
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
|
||||
import requests
|
||||
|
||||
CAPCHA_NOT_READY = "CAPCHA_NOT_READY"
|
||||
REGEX_DATA_SITE_KEY = "data-sitekey=[\"a-z0-9A-Z]+"
|
||||
|
||||
|
||||
class SolveCaptcha:
|
||||
def __init__(self, page):
|
||||
self.page = page
|
||||
self.logger = logging.getLogger("SolveCaptcha")
|
||||
self.main_frame = None
|
||||
self.recaptcha = None
|
||||
|
||||
def delay(self):
|
||||
self.page.wait_for_timeout(random.randint(1, 3) * 1000)
|
||||
|
||||
def presetup(self):
|
||||
name = self.page.locator(
|
||||
"//iframe[@title='reCAPTCHA']").get_attribute("name")
|
||||
self.recaptcha = self.page.frame(name=name)
|
||||
def start(self, handle_solution_received):
|
||||
self.logger.info("start to resolve captcha")
|
||||
content = self.page.content()
|
||||
data_sitekey = re.findall(REGEX_DATA_SITE_KEY, content)
|
||||
self.logger.info(data_sitekey)
|
||||
if len(data_sitekey) == 1:
|
||||
key_with_comma = data_sitekey[0].split("=")[-1]
|
||||
key = key_with_comma.replace("\"", '')
|
||||
print("key is : " + key)
|
||||
self.solve_captcha(key, handle_solution_received)
|
||||
|
||||
self.recaptcha.click("//div[@class='recaptcha-checkbox-border']")
|
||||
self.delay()
|
||||
s = self.recaptcha.locator("//span[@id='recaptcha-anchor']")
|
||||
if s.get_attribute("aria-checked") != "false": # solved already
|
||||
return
|
||||
|
||||
# self.main_frame = self.page.frame(name=self.page.locator(
|
||||
# "//iframe[contains(@src,'https://www.google.com/recaptcha/api2/bframe?')]").get_attribute("name"))
|
||||
# self.main_frame.click("id=recaptcha-audio-button")
|
||||
|
||||
def start(self):
|
||||
print("start to resolve captcha")
|
||||
self.presetup()
|
||||
tries = 0
|
||||
while (tries <= 5):
|
||||
self.delay()
|
||||
try:
|
||||
self.solve_captcha()
|
||||
except Exception as e:
|
||||
print("exception:")
|
||||
print(e)
|
||||
self.main_frame.click("id=recaptcha-reload-button")
|
||||
else:
|
||||
s = self.recaptcha.locator("//span[@id='recaptcha-anchor']")
|
||||
if s.get_attribute("aria-checked") != "false":
|
||||
self.page.click("id=recaptcha-demo-submit")
|
||||
self.delay()
|
||||
break
|
||||
tries += 1
|
||||
|
||||
def solve_captcha(self):
|
||||
print("solve_captcha()")
|
||||
# self.main_frame.click(
|
||||
# "//button[@aria-labelledby='audio-instructions rc-response-label']")
|
||||
# href = self.main_frame.locator(
|
||||
# "//a[@class='rc-audiochallenge-tdownload-link']").get_attribute("href")
|
||||
#
|
||||
# print("retrieve mp3 file")
|
||||
# urllib.request.urlretrieve(href, "audio.mp3")
|
||||
#
|
||||
# sound = pydub.AudioSegment.from_mp3(
|
||||
# "audio.mp3").export("audio.wav", format="wav")
|
||||
#
|
||||
# recognizer = Recognizer()
|
||||
#
|
||||
# recaptcha_audio = AudioFile("audio.wav")
|
||||
# with recaptcha_audio as source:
|
||||
# audio = recognizer.record(source)
|
||||
# print("recognizer mp3 file")
|
||||
#
|
||||
# text = recognizer.recognize_google(audio)
|
||||
# print("recognized text: " + text)
|
||||
# self.main_frame.fill("id=audio-response", text)
|
||||
# self.main_frame.click("id=recaptcha-verify-button")
|
||||
# self.delay()
|
||||
|
||||
def __del__(self):
|
||||
pass
|
||||
# os.remove("audio.mp3")
|
||||
# os.remove("audio.wav")
|
||||
def solve_captcha(self, google_key: str, handle_solution_received):
|
||||
self.logger.info("solve_captcha()")
|
||||
url_get = "http://2captcha.com/in.php?key=e7e3cd0977aba2dab49e0ea052ca58b1&method=userrecaptcha&googlekey={}&pageurl={}".format(
|
||||
google_key, self.page.url)
|
||||
res = requests.get(url_get)
|
||||
print(res.text)
|
||||
results = res.text.split("|")
|
||||
self.captcha_id = results[-1]
|
||||
# wait for 15 seconds
|
||||
time.sleep(15)
|
||||
# get result of the captcha
|
||||
url_response = "http://2captcha.com/res.php?key=e7e3cd0977aba2dab49e0ea052ca58b1&action=get&id={}".format(
|
||||
self.captcha_id)
|
||||
solution = CAPCHA_NOT_READY
|
||||
while solution == CAPCHA_NOT_READY:
|
||||
solution_res = requests.get(url_response)
|
||||
time.sleep(5)
|
||||
solution = solution_res.text
|
||||
print(solution)
|
||||
handle_solution_received(solution.split("|")[-1])
|
||||
|
||||
Reference in New Issue
Block a user