can automate bypass captha

2022-09-03 17:41:48 +02:00
parent b96ebf44ac
commit 597aa6a1f6
3 changed files with 56 additions and 42 deletions
@@ -165,7 +165,7 @@ def read_mails():
                if need_to_valid_url(url, successful_items):
                    url_validator = LinkValidator(url)
                    print("need to validate url: " + url)
-                    executor.submit(url_validator.start_page, params.get_proxy(ProxyType.RESIDENTIAL), True)
+                    executor.submit(url_validator.start_page, params.get_proxy(ProxyType.RESIDENTIAL), False)
                else:
                    print("do not need to click url --> {}".format(mail.mail_address))
@@ -1,6 +1,6 @@
-import re
+import logging
 import os
 import uuid
 from uuid import UUID
 import requests
 import speech_recognition as sr
@@ -11,17 +11,25 @@ from src.params import get_random_wait_time
 WAV_FILE_REGEX = "https:[\/a-z0-9.-]+.wav"
-number_text_dict = {'eight': 8, 'take': 8, 'zero': 0, 'one': 1, 'won': 1, 'to': 2, 'two': 2, 'three': 3, 'four': 4,
+number_text_dict = {'eight': 8, 'take': 8, 'zero': 0, 'one': 1, 'juan': 1, 'won': 1, 'to': 2, 'two': 2, 'three': 3,
                    'four': 4,
                    'for': 4,
                    'five': 5,
                    'six': 6,
-                    'seven': 7, 'nine': 9}
+                    'seven': 7, 'i': 9, 'nine': 9}
 class GeoCaptchaSolver:
    def __init__(self, page):
        self.page = page
        self.logger = logging.getLogger("GeoCaptchaSolver")
    def delete_audio_file(self):
        try:
            os.remove(self.file_name)
        except OSError:
            print(OSError)
    def solve(self):
        print("solve() called.")
@@ -35,35 +43,36 @@ class GeoCaptchaSolver:
        print("audio_tag " + audio_tag)
        wav_url = self.get_wav_src()
        print("src " + wav_url)
        # body = r.text
        # print(body)
        # match = re.search(WAV_FILE_REGEX, body)
        # if match:
        # wav_url = match.group(0)
        # print("wav file " + wav_url)
        response = requests.get(wav_url)
-        file_name = str(uuid.uuid4()) + wav_url.split("/")[-1]
+        self.file_name = str(uuid.uuid4()) + wav_url.split("/")[-1]
-        wget.download(wav_url, file_name)
+        wget.download(wav_url, self.file_name)
-        open(file_name, "wb").write(response.content)
+        open(self.file_name, "wb").write(response.content)
        r = sr.Recognizer()
-        with sr.AudioFile(file_name) as source:
+        with sr.AudioFile(self.file_name) as source:
            audio = r.record(source)  # read the entire audio file
        # recognize speech using Sphinx
        try:
            text = r.recognize_sphinx(audio)
            number_list = text.split(" ")[-6:]
-            print(number_list)
+            self.logger.info(str(number_list))
            number_to_type = []
            for number in number_list:
                number_to_type.append(number_text_dict[number])
            print(number_to_type)
            if len(number_to_type) == 6:
                self.input_numbers(number_to_type)
-            print("Sphinx thinks you said " + text)
+            print("App thinks you said " + text)
            return False
        except sr.UnknownValueError:
-            print("Sphinx could not understand audio")
+            print("App could not understand audio")
            return True
        except sr.RequestError as e:
-            print("Sphinx error; {0}".format(e))
+            print("App error; {0}".format(e))
            return True
        except  Exception as error:
            print(error)
            self.logger.info("error on the file_name:" + self.file_name)
            return True
    def input_numbers(self, number_to_type):
        # use javascript to input
@@ -72,9 +81,9 @@ class GeoCaptchaSolver:
    def get_wav_src(self):
        src = self.iframe.evaluate("""()=>{
-        return document.querySelector('.audio-captcha-track').src;
+            return document.querySelector('.audio-captcha-track').src;
-        }
+            }
-        """)
+            """)
        return src
    def input_by_js(self, number_to_type):
@@ -84,12 +93,13 @@ class GeoCaptchaSolver:
        # )
        for number in number_to_type:
            (self.iframe).evaluate("""(info) =>{
-            let input_fileds = document.getElementsByClassName("audio-captcha-inputs");
+                let input_fileds = document.getElementsByClassName("audio-captcha-inputs");
-            input_fileds[info.index].focus();
+                input_fileds[info.index].focus();
-            input_fileds[info.index].value = info.number;}
+                input_fileds[info.index].value = info.number;}
-            """, {'index': index, 'number': number})
+                """, {'index': index, 'number': number})
            index = index + 1
            time.sleep(get_random_wait_time())
        self.iframe.query_selector(".audio-captcha-verify-button").evaluate("""(btn)=>{
-        btn.click()};""")
+            btn.click()};""")
        self.delete_audio_file()
@@ -21,11 +21,11 @@ from src.workers.GeoCaptchSolver import GeoCaptchaSolver
 from src.workers.SolveCaptch import SolveCaptcha
 from src.workers.TlsPlaywright import TlsPlaywright
-# RDV_URL = "https://rendezvousparis.hermes.com/client/register"
+RDV_URL = "https://rendezvousparis.hermes.com/client/register"
 # RDV_URL = "file:///Users/lpan/Downloads/test_appointment.html"
 # RDV_URL = "https://api.ipify.org"
-RDV_URL = "https://bot.sannysoft.com/"
+# RDV_URL = "https://bot.sannysoft.com/"
 REGEX_RDV_URL = "https:\/\/rendezvousparis\.hermes\.com\/client\/register\/[A-Z0-9]+"
 BLOCKED_MESSAGE_FR = "Pourquoi ce blocage"
 BLOCKED_MESSAGE_EN = "You have been blocked"
@@ -95,15 +95,15 @@ class CommandorPage:
        self.on_success_listener = on_success
        # reset otp_value to None
        self.otp_value = None
-        # ios_devices_keys = self.tls.playwright.devices.keys()
+        # device = None
-        # ios_keys = []
+        device_key = random.choice(params.DEVICES)
        # for key in ios_devices_keys:
        #     if "iPhone" in key or "iPad" in key:
        #         ios_keys.append(key)
        # print(ios_keys)
        # devices = random.choice(params.DEVICES)
        device_key = random.sample(list(self.tls.playwright.devices), 1)[0]
        device = self.tls.playwright.devices[device_key]
        # while device is None:
        #     device_key = random.sample(list(self.tls.playwright.devices), 1)[0]
        #     device = self.tls.playwright.devices[device_key]
        #     if "iPad" not in device['user_agent'] or "iPhone" not in device['user_agent']:
        #         device = None
        first_page = None
        while first_page is None:
            first_page = self.start_browser(proxy, self.tls.playwright, device, use_proxy=use_proxy)
@@ -137,7 +137,7 @@ class CommandorPage:
                else:
                    self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT)
                userAgent = device['user_agent']
-                print("user_agent is " + userAgent)
+                self.logger.info("user_agent is " + userAgent)
                # context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
                context = self.browser.new_context(**device)
            else:
@@ -146,6 +146,7 @@ class CommandorPage:
                else:
                    self.browser = pwright.firefox.launch(headless=self.headless, timeout=PAGE_TIMEOUT)
                userAgent = device['user_agent']
                self.logger.info("user_agent is " + userAgent)
                context = self.browser.new_context(user_agent=userAgent)
            self.current_context = context
            self.create_and_config_page(context)
@@ -182,7 +183,7 @@ class CommandorPage:
    def start_page(self, proxy, use_proxy=True):
        self._run(proxy, use_proxy)
-    def solve_datadome_captcha(self):
+    def solve_datadome_captcha(self) -> bool:
        print("solve_datadome_captcha")
        time.sleep(2)
        success = self._click_audio_btn()
@@ -190,9 +191,9 @@ class CommandorPage:
            time.sleep(16)
            time.sleep(get_random_wait_time())
            solver = GeoCaptchaSolver(self.page)
-            solver.solve()
+            return solver.solve()
        else:
-            self.termine()
+            return True
    def _on_page_loaded(self):
        # time.sleep(40000)
@@ -215,7 +216,8 @@ class CommandorPage:
        #         print("we are blocked")
        #         # self.termine()
        else:
-            self.solve_datadome_captcha()
+            if self.solve_datadome_captcha():
                self.termine()
    def on_document_loaded(self):
        self.logger.info("on_document_loaded called")
@@ -370,6 +372,8 @@ class CommandorPage:
    def publish_message_to_queue(self, contact: ContactPojo, status: PublishType, url: str):
        # create the message
        if url == "https://rendezvousparis.hermes.com/client/welcome":
            return
        id = url.split("/")[-1]
        result = ReserveResultPojo(type=status, phone=contact.phone, message=status.value, url=url,
                                   firstName=contact.first_name, lastName=contact.last_name, email=contact.mail,