diff --git a/src/mail/mail_reader.py b/src/mail/mail_reader.py index f86834d..7e84efd 100644 --- a/src/mail/mail_reader.py +++ b/src/mail/mail_reader.py @@ -165,7 +165,7 @@ def read_mails(): if need_to_valid_url(url, successful_items): url_validator = LinkValidator(url) print("need to validate url: " + url) - executor.submit(url_validator.start_page, params.get_proxy(ProxyType.RESIDENTIAL), True) + executor.submit(url_validator.start_page, params.get_proxy(ProxyType.RESIDENTIAL), False) else: print("do not need to click url --> {}".format(mail.mail_address)) diff --git a/src/workers/GeoCaptchSolver.py b/src/workers/GeoCaptchSolver.py index 4553bbf..891dc08 100644 --- a/src/workers/GeoCaptchSolver.py +++ b/src/workers/GeoCaptchSolver.py @@ -1,6 +1,6 @@ -import re +import logging +import os import uuid -from uuid import UUID import requests import speech_recognition as sr @@ -11,17 +11,25 @@ from src.params import get_random_wait_time WAV_FILE_REGEX = "https:[\/a-z0-9.-]+.wav" -number_text_dict = {'eight': 8, 'take': 8, 'zero': 0, 'one': 1, 'won': 1, 'to': 2, 'two': 2, 'three': 3, 'four': 4, +number_text_dict = {'eight': 8, 'take': 8, 'zero': 0, 'one': 1, 'juan': 1, 'won': 1, 'to': 2, 'two': 2, 'three': 3, + 'four': 4, 'for': 4, 'five': 5, 'six': 6, - 'seven': 7, 'nine': 9} + 'seven': 7, 'i': 9, 'nine': 9} class GeoCaptchaSolver: def __init__(self, page): self.page = page + self.logger = logging.getLogger("GeoCaptchaSolver") + + def delete_audio_file(self): + try: + os.remove(self.file_name) + except OSError: + print(OSError) def solve(self): print("solve() called.") @@ -35,35 +43,36 @@ class GeoCaptchaSolver: print("audio_tag " + audio_tag) wav_url = self.get_wav_src() print("src " + wav_url) - # body = r.text - # print(body) - # match = re.search(WAV_FILE_REGEX, body) - # if match: - # wav_url = match.group(0) - # print("wav file " + wav_url) response = requests.get(wav_url) - file_name = str(uuid.uuid4()) + wav_url.split("/")[-1] - wget.download(wav_url, file_name) - open(file_name, "wb").write(response.content) + self.file_name = str(uuid.uuid4()) + wav_url.split("/")[-1] + wget.download(wav_url, self.file_name) + open(self.file_name, "wb").write(response.content) r = sr.Recognizer() - with sr.AudioFile(file_name) as source: + with sr.AudioFile(self.file_name) as source: audio = r.record(source) # read the entire audio file # recognize speech using Sphinx try: text = r.recognize_sphinx(audio) number_list = text.split(" ")[-6:] - print(number_list) + self.logger.info(str(number_list)) number_to_type = [] for number in number_list: number_to_type.append(number_text_dict[number]) print(number_to_type) if len(number_to_type) == 6: self.input_numbers(number_to_type) - print("Sphinx thinks you said " + text) + print("App thinks you said " + text) + return False except sr.UnknownValueError: - print("Sphinx could not understand audio") + print("App could not understand audio") + return True except sr.RequestError as e: - print("Sphinx error; {0}".format(e)) + print("App error; {0}".format(e)) + return True + except Exception as error: + print(error) + self.logger.info("error on the file_name:" + self.file_name) + return True def input_numbers(self, number_to_type): # use javascript to input @@ -72,9 +81,9 @@ class GeoCaptchaSolver: def get_wav_src(self): src = self.iframe.evaluate("""()=>{ - return document.querySelector('.audio-captcha-track').src; - } - """) + return document.querySelector('.audio-captcha-track').src; + } + """) return src def input_by_js(self, number_to_type): @@ -84,12 +93,13 @@ class GeoCaptchaSolver: # ) for number in number_to_type: (self.iframe).evaluate("""(info) =>{ - let input_fileds = document.getElementsByClassName("audio-captcha-inputs"); - input_fileds[info.index].focus(); - input_fileds[info.index].value = info.number;} - """, {'index': index, 'number': number}) + let input_fileds = document.getElementsByClassName("audio-captcha-inputs"); + input_fileds[info.index].focus(); + input_fileds[info.index].value = info.number;} + """, {'index': index, 'number': number}) index = index + 1 time.sleep(get_random_wait_time()) self.iframe.query_selector(".audio-captcha-verify-button").evaluate("""(btn)=>{ - btn.click()};""") + btn.click()};""") + self.delete_audio_file() diff --git a/src/workers/commandor_page.py b/src/workers/commandor_page.py index 040fc36..97f7dc1 100644 --- a/src/workers/commandor_page.py +++ b/src/workers/commandor_page.py @@ -21,11 +21,11 @@ from src.workers.GeoCaptchSolver import GeoCaptchaSolver from src.workers.SolveCaptch import SolveCaptcha from src.workers.TlsPlaywright import TlsPlaywright -# RDV_URL = "https://rendezvousparis.hermes.com/client/register" +RDV_URL = "https://rendezvousparis.hermes.com/client/register" # RDV_URL = "file:///Users/lpan/Downloads/test_appointment.html" # RDV_URL = "https://api.ipify.org" -RDV_URL = "https://bot.sannysoft.com/" +# RDV_URL = "https://bot.sannysoft.com/" REGEX_RDV_URL = "https:\/\/rendezvousparis\.hermes\.com\/client\/register\/[A-Z0-9]+" BLOCKED_MESSAGE_FR = "Pourquoi ce blocage" BLOCKED_MESSAGE_EN = "You have been blocked" @@ -95,15 +95,15 @@ class CommandorPage: self.on_success_listener = on_success # reset otp_value to None self.otp_value = None - # ios_devices_keys = self.tls.playwright.devices.keys() - # ios_keys = [] - # for key in ios_devices_keys: - # if "iPhone" in key or "iPad" in key: - # ios_keys.append(key) - # print(ios_keys) - # devices = random.choice(params.DEVICES) - device_key = random.sample(list(self.tls.playwright.devices), 1)[0] + # device = None + device_key = random.choice(params.DEVICES) device = self.tls.playwright.devices[device_key] + # while device is None: + # device_key = random.sample(list(self.tls.playwright.devices), 1)[0] + # device = self.tls.playwright.devices[device_key] + # if "iPad" not in device['user_agent'] or "iPhone" not in device['user_agent']: + # device = None + first_page = None while first_page is None: first_page = self.start_browser(proxy, self.tls.playwright, device, use_proxy=use_proxy) @@ -137,7 +137,7 @@ class CommandorPage: else: self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT) userAgent = device['user_agent'] - print("user_agent is " + userAgent) + self.logger.info("user_agent is " + userAgent) # context = self.browser.new_context(**simulated_mobile, locale='fr-FR') context = self.browser.new_context(**device) else: @@ -146,6 +146,7 @@ class CommandorPage: else: self.browser = pwright.firefox.launch(headless=self.headless, timeout=PAGE_TIMEOUT) userAgent = device['user_agent'] + self.logger.info("user_agent is " + userAgent) context = self.browser.new_context(user_agent=userAgent) self.current_context = context self.create_and_config_page(context) @@ -182,7 +183,7 @@ class CommandorPage: def start_page(self, proxy, use_proxy=True): self._run(proxy, use_proxy) - def solve_datadome_captcha(self): + def solve_datadome_captcha(self) -> bool: print("solve_datadome_captcha") time.sleep(2) success = self._click_audio_btn() @@ -190,9 +191,9 @@ class CommandorPage: time.sleep(16) time.sleep(get_random_wait_time()) solver = GeoCaptchaSolver(self.page) - solver.solve() + return solver.solve() else: - self.termine() + return True def _on_page_loaded(self): # time.sleep(40000) @@ -215,7 +216,8 @@ class CommandorPage: # print("we are blocked") # # self.termine() else: - self.solve_datadome_captcha() + if self.solve_datadome_captcha(): + self.termine() def on_document_loaded(self): self.logger.info("on_document_loaded called") @@ -370,6 +372,8 @@ class CommandorPage: def publish_message_to_queue(self, contact: ContactPojo, status: PublishType, url: str): # create the message + if url == "https://rendezvousparis.hermes.com/client/welcome": + return id = url.split("/")[-1] result = ReserveResultPojo(type=status, phone=contact.phone, message=status.value, url=url, firstName=contact.first_name, lastName=contact.last_name, email=contact.mail,