From 380398c76886de7a3775ff8ab2fff11435a1b6d7 Mon Sep 17 00:00:00 2001 From: PAN Lei Date: Mon, 23 May 2022 13:12:55 +0200 Subject: [PATCH] can resolve captcha with 2captcha --- workers/SolveCaptch.py | 107 ++++++++++++++------------------------ workers/commandor_page.py | 46 ++-------------- 2 files changed, 43 insertions(+), 110 deletions(-) diff --git a/workers/SolveCaptch.py b/workers/SolveCaptch.py index ed80168..9f0a470 100644 --- a/workers/SolveCaptch.py +++ b/workers/SolveCaptch.py @@ -1,82 +1,53 @@ -import random # for recaptcha -import urllib -from speech_recognition import Recognizer, AudioFile +import logging import random -import os +import re +import time + +import requests + +CAPCHA_NOT_READY = "CAPCHA_NOT_READY" +REGEX_DATA_SITE_KEY = "data-sitekey=[\"a-z0-9A-Z]+" class SolveCaptcha: def __init__(self, page): self.page = page + self.logger = logging.getLogger("SolveCaptcha") self.main_frame = None self.recaptcha = None def delay(self): self.page.wait_for_timeout(random.randint(1, 3) * 1000) - def presetup(self): - name = self.page.locator( - "//iframe[@title='reCAPTCHA']").get_attribute("name") - self.recaptcha = self.page.frame(name=name) + def start(self, handle_solution_received): + self.logger.info("start to resolve captcha") + content = self.page.content() + data_sitekey = re.findall(REGEX_DATA_SITE_KEY, content) + self.logger.info(data_sitekey) + if len(data_sitekey) == 1: + key_with_comma = data_sitekey[0].split("=")[-1] + key = key_with_comma.replace("\"", '') + print("key is : " + key) + self.solve_captcha(key, handle_solution_received) - self.recaptcha.click("//div[@class='recaptcha-checkbox-border']") - self.delay() - s = self.recaptcha.locator("//span[@id='recaptcha-anchor']") - if s.get_attribute("aria-checked") != "false": # solved already - return - - # self.main_frame = self.page.frame(name=self.page.locator( - # "//iframe[contains(@src,'https://www.google.com/recaptcha/api2/bframe?')]").get_attribute("name")) - # self.main_frame.click("id=recaptcha-audio-button") - - def start(self): - print("start to resolve captcha") - self.presetup() - tries = 0 - while (tries <= 5): - self.delay() - try: - self.solve_captcha() - except Exception as e: - print("exception:") - print(e) - self.main_frame.click("id=recaptcha-reload-button") - else: - s = self.recaptcha.locator("//span[@id='recaptcha-anchor']") - if s.get_attribute("aria-checked") != "false": - self.page.click("id=recaptcha-demo-submit") - self.delay() - break - tries += 1 - - def solve_captcha(self): - print("solve_captcha()") - # self.main_frame.click( - # "//button[@aria-labelledby='audio-instructions rc-response-label']") - # href = self.main_frame.locator( - # "//a[@class='rc-audiochallenge-tdownload-link']").get_attribute("href") - # - # print("retrieve mp3 file") - # urllib.request.urlretrieve(href, "audio.mp3") - # - # sound = pydub.AudioSegment.from_mp3( - # "audio.mp3").export("audio.wav", format="wav") - # - # recognizer = Recognizer() - # - # recaptcha_audio = AudioFile("audio.wav") - # with recaptcha_audio as source: - # audio = recognizer.record(source) - # print("recognizer mp3 file") - # - # text = recognizer.recognize_google(audio) - # print("recognized text: " + text) - # self.main_frame.fill("id=audio-response", text) - # self.main_frame.click("id=recaptcha-verify-button") - # self.delay() - - def __del__(self): - pass - # os.remove("audio.mp3") - # os.remove("audio.wav") + def solve_captcha(self, google_key: str, handle_solution_received): + self.logger.info("solve_captcha()") + url_get = "http://2captcha.com/in.php?key=e7e3cd0977aba2dab49e0ea052ca58b1&method=userrecaptcha&googlekey={}&pageurl={}".format( + google_key, self.page.url) + res = requests.get(url_get) + print(res.text) + results = res.text.split("|") + self.captcha_id = results[-1] + # wait for 15 seconds + time.sleep(15) + # get result of the captcha + url_response = "http://2captcha.com/res.php?key=e7e3cd0977aba2dab49e0ea052ca58b1&action=get&id={}".format( + self.captcha_id) + solution = CAPCHA_NOT_READY + while solution == CAPCHA_NOT_READY: + solution_res = requests.get(url_response) + time.sleep(5) + solution = solution_res.text + print(solution) + handle_solution_received(solution.split("|")[-1]) diff --git a/workers/commandor_page.py b/workers/commandor_page.py index 7d6f3e3..f2bbed4 100644 --- a/workers/commandor_page.py +++ b/workers/commandor_page.py @@ -6,7 +6,6 @@ import threading import time from typing import Union -import requests from playwright.sync_api import sync_playwright import params @@ -20,8 +19,6 @@ RDV_URL = "https://rendezvousparis.hermes.com/client/register" # RDV_URL = "file:///Users/lpan/Downloads/test_appointment.html" # RDV_URL = "https://api.ipify.org" # RDV_URL ="https://bot.sannysoft.com/" -REGEX_DATA_SITE_KEY = "data-sitekey=[\"a-z0-9A-Z]+" -CAPCHA_NOT_READY = "CAPCHA_NOT_READY" REGEX_RDV_URL = "https:\/\/rendezvousparis\.hermes\.com\/client\/register\/[A-Z0-9]+" otp_value = None OTP_FIELD_ID = "#sms_code" @@ -96,7 +93,7 @@ class CommandorPage: } self.fill_fields() # wait the captha element - self.find_data_sitekey() + self.resolve_captcha() # wait for sms_code field # self.clickOnValidBtn() self.thread_event = e @@ -293,14 +290,6 @@ class CommandorPage: def clear_app_data(self): pass - def click_recapcha_checkbox(self): - captcha_solver = SolveCaptcha(self.page) - captcha_solver.start() - del captcha_solver - # checkbox = self.page.wait_for_selector( - # "#recaptcha-anchor") - # checkbox.click() - def fill_otp(self, otp: str): self.page.focus(OTP_FIELD_ID) time.sleep(get_random_wait_time()) @@ -323,36 +312,9 @@ class CommandorPage: time.sleep(2) self.browser.close() - def find_data_sitekey(self): - content = self.page.content() - data_sitekey = re.findall(REGEX_DATA_SITE_KEY, content) - self.logger.info(data_sitekey) - if len(data_sitekey) == 1: - key_with_comma = data_sitekey[0].split("=")[-1] - key = key_with_comma.replace("\"", '') - print("key is : " + key) - self.send_to_resolve_captcha(key) - - def send_to_resolve_captcha(self, google_key: str): - url_get = "http://2captcha.com/in.php?key=e7e3cd0977aba2dab49e0ea052ca58b1&method=userrecaptcha&googlekey={}&pageurl={}".format( - google_key, self.page.url) - res = requests.get(url_get) - print(res.text) - results = res.text.split("|") - self.captcha_id = results[-1] - # wait for 15 seconds - time.sleep(15) - # get result of the captcha - url_response = "http://2captcha.com/res.php?key=e7e3cd0977aba2dab49e0ea052ca58b1&action=get&id={}".format( - self.captcha_id) - solution = CAPCHA_NOT_READY - while solution == CAPCHA_NOT_READY: - solution_res = requests.get(url_response) - print(solution_res.text) - time.sleep(5) - solution = solution_res.text - print(solution) - self.fill_captcha_solution(solution.split("|")[-1]) + def resolve_captcha(self): + self.captcha_solver = SolveCaptcha(self.page) + self.captcha_solver.start(self.fill_captcha_solution) def fill_captcha_solution(self, solution): self.logger.info("will input solution: " + solution)