can automate bypass captha

This commit is contained in:
2022-09-03 17:41:48 +02:00
parent b96ebf44ac
commit 597aa6a1f6
3 changed files with 56 additions and 42 deletions
+1 -1
View File
@@ -165,7 +165,7 @@ def read_mails():
if need_to_valid_url(url, successful_items): if need_to_valid_url(url, successful_items):
url_validator = LinkValidator(url) url_validator = LinkValidator(url)
print("need to validate url: " + url) print("need to validate url: " + url)
executor.submit(url_validator.start_page, params.get_proxy(ProxyType.RESIDENTIAL), True) executor.submit(url_validator.start_page, params.get_proxy(ProxyType.RESIDENTIAL), False)
else: else:
print("do not need to click url --> {}".format(mail.mail_address)) print("do not need to click url --> {}".format(mail.mail_address))
+36 -26
View File
@@ -1,6 +1,6 @@
import re import logging
import os
import uuid import uuid
from uuid import UUID
import requests import requests
import speech_recognition as sr import speech_recognition as sr
@@ -11,17 +11,25 @@ from src.params import get_random_wait_time
WAV_FILE_REGEX = "https:[\/a-z0-9.-]+.wav" WAV_FILE_REGEX = "https:[\/a-z0-9.-]+.wav"
number_text_dict = {'eight': 8, 'take': 8, 'zero': 0, 'one': 1, 'won': 1, 'to': 2, 'two': 2, 'three': 3, 'four': 4, number_text_dict = {'eight': 8, 'take': 8, 'zero': 0, 'one': 1, 'juan': 1, 'won': 1, 'to': 2, 'two': 2, 'three': 3,
'four': 4,
'for': 4, 'for': 4,
'five': 5, 'five': 5,
'six': 6, 'six': 6,
'seven': 7, 'nine': 9} 'seven': 7, 'i': 9, 'nine': 9}
class GeoCaptchaSolver: class GeoCaptchaSolver:
def __init__(self, page): def __init__(self, page):
self.page = page self.page = page
self.logger = logging.getLogger("GeoCaptchaSolver")
def delete_audio_file(self):
try:
os.remove(self.file_name)
except OSError:
print(OSError)
def solve(self): def solve(self):
print("solve() called.") print("solve() called.")
@@ -35,35 +43,36 @@ class GeoCaptchaSolver:
print("audio_tag " + audio_tag) print("audio_tag " + audio_tag)
wav_url = self.get_wav_src() wav_url = self.get_wav_src()
print("src " + wav_url) print("src " + wav_url)
# body = r.text
# print(body)
# match = re.search(WAV_FILE_REGEX, body)
# if match:
# wav_url = match.group(0)
# print("wav file " + wav_url)
response = requests.get(wav_url) response = requests.get(wav_url)
file_name = str(uuid.uuid4()) + wav_url.split("/")[-1] self.file_name = str(uuid.uuid4()) + wav_url.split("/")[-1]
wget.download(wav_url, file_name) wget.download(wav_url, self.file_name)
open(file_name, "wb").write(response.content) open(self.file_name, "wb").write(response.content)
r = sr.Recognizer() r = sr.Recognizer()
with sr.AudioFile(file_name) as source: with sr.AudioFile(self.file_name) as source:
audio = r.record(source) # read the entire audio file audio = r.record(source) # read the entire audio file
# recognize speech using Sphinx # recognize speech using Sphinx
try: try:
text = r.recognize_sphinx(audio) text = r.recognize_sphinx(audio)
number_list = text.split(" ")[-6:] number_list = text.split(" ")[-6:]
print(number_list) self.logger.info(str(number_list))
number_to_type = [] number_to_type = []
for number in number_list: for number in number_list:
number_to_type.append(number_text_dict[number]) number_to_type.append(number_text_dict[number])
print(number_to_type) print(number_to_type)
if len(number_to_type) == 6: if len(number_to_type) == 6:
self.input_numbers(number_to_type) self.input_numbers(number_to_type)
print("Sphinx thinks you said " + text) print("App thinks you said " + text)
return False
except sr.UnknownValueError: except sr.UnknownValueError:
print("Sphinx could not understand audio") print("App could not understand audio")
return True
except sr.RequestError as e: except sr.RequestError as e:
print("Sphinx error; {0}".format(e)) print("App error; {0}".format(e))
return True
except Exception as error:
print(error)
self.logger.info("error on the file_name:" + self.file_name)
return True
def input_numbers(self, number_to_type): def input_numbers(self, number_to_type):
# use javascript to input # use javascript to input
@@ -72,9 +81,9 @@ class GeoCaptchaSolver:
def get_wav_src(self): def get_wav_src(self):
src = self.iframe.evaluate("""()=>{ src = self.iframe.evaluate("""()=>{
return document.querySelector('.audio-captcha-track').src; return document.querySelector('.audio-captcha-track').src;
} }
""") """)
return src return src
def input_by_js(self, number_to_type): def input_by_js(self, number_to_type):
@@ -84,12 +93,13 @@ class GeoCaptchaSolver:
# ) # )
for number in number_to_type: for number in number_to_type:
(self.iframe).evaluate("""(info) =>{ (self.iframe).evaluate("""(info) =>{
let input_fileds = document.getElementsByClassName("audio-captcha-inputs"); let input_fileds = document.getElementsByClassName("audio-captcha-inputs");
input_fileds[info.index].focus(); input_fileds[info.index].focus();
input_fileds[info.index].value = info.number;} input_fileds[info.index].value = info.number;}
""", {'index': index, 'number': number}) """, {'index': index, 'number': number})
index = index + 1 index = index + 1
time.sleep(get_random_wait_time()) time.sleep(get_random_wait_time())
self.iframe.query_selector(".audio-captcha-verify-button").evaluate("""(btn)=>{ self.iframe.query_selector(".audio-captcha-verify-button").evaluate("""(btn)=>{
btn.click()};""") btn.click()};""")
self.delete_audio_file()
+19 -15
View File
@@ -21,11 +21,11 @@ from src.workers.GeoCaptchSolver import GeoCaptchaSolver
from src.workers.SolveCaptch import SolveCaptcha from src.workers.SolveCaptch import SolveCaptcha
from src.workers.TlsPlaywright import TlsPlaywright from src.workers.TlsPlaywright import TlsPlaywright
# RDV_URL = "https://rendezvousparis.hermes.com/client/register" RDV_URL = "https://rendezvousparis.hermes.com/client/register"
# RDV_URL = "file:///Users/lpan/Downloads/test_appointment.html" # RDV_URL = "file:///Users/lpan/Downloads/test_appointment.html"
# RDV_URL = "https://api.ipify.org" # RDV_URL = "https://api.ipify.org"
RDV_URL = "https://bot.sannysoft.com/" # RDV_URL = "https://bot.sannysoft.com/"
REGEX_RDV_URL = "https:\/\/rendezvousparis\.hermes\.com\/client\/register\/[A-Z0-9]+" REGEX_RDV_URL = "https:\/\/rendezvousparis\.hermes\.com\/client\/register\/[A-Z0-9]+"
BLOCKED_MESSAGE_FR = "Pourquoi ce blocage" BLOCKED_MESSAGE_FR = "Pourquoi ce blocage"
BLOCKED_MESSAGE_EN = "You have been blocked" BLOCKED_MESSAGE_EN = "You have been blocked"
@@ -95,15 +95,15 @@ class CommandorPage:
self.on_success_listener = on_success self.on_success_listener = on_success
# reset otp_value to None # reset otp_value to None
self.otp_value = None self.otp_value = None
# ios_devices_keys = self.tls.playwright.devices.keys() # device = None
# ios_keys = [] device_key = random.choice(params.DEVICES)
# for key in ios_devices_keys:
# if "iPhone" in key or "iPad" in key:
# ios_keys.append(key)
# print(ios_keys)
# devices = random.choice(params.DEVICES)
device_key = random.sample(list(self.tls.playwright.devices), 1)[0]
device = self.tls.playwright.devices[device_key] device = self.tls.playwright.devices[device_key]
# while device is None:
# device_key = random.sample(list(self.tls.playwright.devices), 1)[0]
# device = self.tls.playwright.devices[device_key]
# if "iPad" not in device['user_agent'] or "iPhone" not in device['user_agent']:
# device = None
first_page = None first_page = None
while first_page is None: while first_page is None:
first_page = self.start_browser(proxy, self.tls.playwright, device, use_proxy=use_proxy) first_page = self.start_browser(proxy, self.tls.playwright, device, use_proxy=use_proxy)
@@ -137,7 +137,7 @@ class CommandorPage:
else: else:
self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT) self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT)
userAgent = device['user_agent'] userAgent = device['user_agent']
print("user_agent is " + userAgent) self.logger.info("user_agent is " + userAgent)
# context = self.browser.new_context(**simulated_mobile, locale='fr-FR') # context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
context = self.browser.new_context(**device) context = self.browser.new_context(**device)
else: else:
@@ -146,6 +146,7 @@ class CommandorPage:
else: else:
self.browser = pwright.firefox.launch(headless=self.headless, timeout=PAGE_TIMEOUT) self.browser = pwright.firefox.launch(headless=self.headless, timeout=PAGE_TIMEOUT)
userAgent = device['user_agent'] userAgent = device['user_agent']
self.logger.info("user_agent is " + userAgent)
context = self.browser.new_context(user_agent=userAgent) context = self.browser.new_context(user_agent=userAgent)
self.current_context = context self.current_context = context
self.create_and_config_page(context) self.create_and_config_page(context)
@@ -182,7 +183,7 @@ class CommandorPage:
def start_page(self, proxy, use_proxy=True): def start_page(self, proxy, use_proxy=True):
self._run(proxy, use_proxy) self._run(proxy, use_proxy)
def solve_datadome_captcha(self): def solve_datadome_captcha(self) -> bool:
print("solve_datadome_captcha") print("solve_datadome_captcha")
time.sleep(2) time.sleep(2)
success = self._click_audio_btn() success = self._click_audio_btn()
@@ -190,9 +191,9 @@ class CommandorPage:
time.sleep(16) time.sleep(16)
time.sleep(get_random_wait_time()) time.sleep(get_random_wait_time())
solver = GeoCaptchaSolver(self.page) solver = GeoCaptchaSolver(self.page)
solver.solve() return solver.solve()
else: else:
self.termine() return True
def _on_page_loaded(self): def _on_page_loaded(self):
# time.sleep(40000) # time.sleep(40000)
@@ -215,7 +216,8 @@ class CommandorPage:
# print("we are blocked") # print("we are blocked")
# # self.termine() # # self.termine()
else: else:
self.solve_datadome_captcha() if self.solve_datadome_captcha():
self.termine()
def on_document_loaded(self): def on_document_loaded(self):
self.logger.info("on_document_loaded called") self.logger.info("on_document_loaded called")
@@ -370,6 +372,8 @@ class CommandorPage:
def publish_message_to_queue(self, contact: ContactPojo, status: PublishType, url: str): def publish_message_to_queue(self, contact: ContactPojo, status: PublishType, url: str):
# create the message # create the message
if url == "https://rendezvousparis.hermes.com/client/welcome":
return
id = url.split("/")[-1] id = url.split("/")[-1]
result = ReserveResultPojo(type=status, phone=contact.phone, message=status.value, url=url, result = ReserveResultPojo(type=status, phone=contact.phone, message=status.value, url=url,
firstName=contact.first_name, lastName=contact.last_name, email=contact.mail, firstName=contact.first_name, lastName=contact.last_name, email=contact.mail,