Merge branch 'feature/pb_datadome' into feature/anti-cap

This commit is contained in:
2022-09-02 17:06:14 +02:00
8 changed files with 488 additions and 681 deletions
+51 -38
View File
@@ -6,7 +6,9 @@ import wget
WAV_FILE_REGEX = "https:[\/a-z0-9.-]+.wav"
number_text_dict = {'eight': 8, 'zero': 0, 'one': 1, 'to': 2, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6,
number_text_dict = {'eight': 8, 'zero': 0, 'one': 1, 'won': 1, 'to': 2, 'two': 2, 'three': 3, 'four': 4, 'for': 4,
'five': 5,
'six': 6,
'seven': 7, 'nine': 9}
@@ -22,54 +24,65 @@ class GeoCaptchaSolver:
self.iframe = iframe
print(type(iframe))
print("url is " + iframe.url)
r = requests.get(iframe.url)
body = r.text
# r = requests.get(iframe.url)
audio_tag = iframe.query_selector('.audio-captcha-track').inner_html()
print("audio_tag " + audio_tag)
wav_url = self.get_wav_src()
print("src " + wav_url)
# body = r.text
# print(body)
match = re.search(WAV_FILE_REGEX, body)
if match:
wav_url = match.group(0)
print("wav file " + wav_url)
response = requests.get(wav_url)
file_name = wav_url.split("/")[-1]
wget.download(wav_url, file_name)
open(file_name, "wb").write(response.content)
r = sr.Recognizer()
with sr.AudioFile(file_name) as source:
audio = r.record(source) # read the entire audio file
# recognize speech using Sphinx
try:
text = r.recognize_sphinx(audio)
number_list = text.split(" ")[-6:]
print(number_list)
number_to_type = []
for number in number_list:
number_to_type.append(number_text_dict[number])
print(number_to_type)
if len(number_to_type) == 6:
self.input_numbers(number_to_type)
print("Sphinx thinks you said " + text)
except sr.UnknownValueError:
print("Sphinx could not understand audio")
except sr.RequestError as e:
print("Sphinx error; {0}".format(e))
else:
print("wav file not found")
# match = re.search(WAV_FILE_REGEX, body)
# if match:
# wav_url = match.group(0)
# print("wav file " + wav_url)
response = requests.get(wav_url)
file_name = wav_url.split("/")[-1]
wget.download(wav_url, file_name)
open(file_name, "wb").write(response.content)
r = sr.Recognizer()
with sr.AudioFile(file_name) as source:
audio = r.record(source) # read the entire audio file
# recognize speech using Sphinx
try:
text = r.recognize_sphinx(audio)
number_list = text.split(" ")[-6:]
print(number_list)
number_to_type = []
for number in number_list:
number_to_type.append(number_text_dict[number])
print(number_to_type)
if len(number_to_type) == 6:
self.input_numbers(number_to_type)
print("Sphinx thinks you said " + text)
except sr.UnknownValueError:
print("Sphinx could not understand audio")
except sr.RequestError as e:
print("Sphinx error; {0}".format(e))
def input_numbers(self, number_to_type):
# use javascript to input
self.input_by_js(number_to_type)
# input_elements = self.page.query_selector('.audio-captcha-inputs')
# print(input_elements)
# download file
def get_wav_src(self):
src = self.iframe.evaluate("""()=>{
return document.querySelector('.audio-captcha-track').src;
}
""")
return src
def input_by_js(self, number_to_type):
index = 0
# self.iframe.query_selector('.audio-captcha-inputs').evaluate(
#
# )
for number in number_to_type:
index = index + 1
(self.iframe.page).evaluate("""(info) =>{
(self.iframe).evaluate("""(info) =>{
let input_fileds = document.getElementsByClassName("audio-captcha-inputs");
console.log(input_fileds)
input_fileds[info.index].focus();
input_fileds[info.index].value = info.number;}
""", {'index': index, 'number': number})
index = index + 1
self.iframe.query_selector(".audio-captcha-verify-button").evaluate("""(btn)=>{
btn.click()};""")
+40 -47
View File
@@ -2,14 +2,14 @@ import datetime
import logging
import random
import re
import requests
import sys
import threading
import time
import traceback
from typing import Union
from playwright_stealth import stealth_sync
from src import params, definitions
from src.db.mongo_manager import MONGO_STORE_MANAGER
from src.pojo.ModeEnum import ModeEnum
@@ -26,6 +26,8 @@ RDV_URL = "https://rendezvousparis.hermes.com/client/register"
# RDV_URL = "https://api.ipify.org"
# RDV_URL ="https://bot.sannysoft.com/"
REGEX_RDV_URL = "https:\/\/rendezvousparis\.hermes\.com\/client\/register\/[A-Z0-9]+"
BLOCKED_MESSAGE_FR = "Pourquoi ce blocage"
BLOCKED_MESSAGE_EN = "You have been blocked"
otp_value = None
OTP_FIELD_ID = "#sms_code"
MESSAGE_FIELD_CLASS = ".message"
@@ -53,7 +55,7 @@ def get_random_wait_time() -> float:
class CommandorPage:
tls = TlsPlaywright()
def __init__(self, contact: ContactPojo, store_type=0, proxy_type=ProxyType.BRIGHT_DATA,
def __init__(self, contact: ContactPojo, store_type=0, proxy_type=ProxyType.RESIDENTIAL,
mode: ModeEnum = ModeEnum.MANUAL, headless=False):
self.otp_value = None
self.is_finished = False
@@ -97,6 +99,12 @@ class CommandorPage:
self.on_success_listener = on_success
# reset otp_value to None
self.otp_value = None
# ios_devices_keys = self.tls.playwright.devices.keys()
# ios_keys = []
# for key in ios_devices_keys:
# if "iPhone" in key or "iPad" in key:
# ios_keys.append(key)
# print(ios_keys)
devices = random.choice(params.DEVICES)
first_page = None
while first_page is None:
@@ -145,7 +153,6 @@ class CommandorPage:
def create_and_config_page(self, context):
self.page = context.new_page()
stealth_sync(self.page)
# hide webdriver information
self.page.add_init_script("""() => {
Object.defineProperty(navigator,'webdriver',{get: () => undefined});
@@ -171,36 +178,11 @@ class CommandorPage:
def solve_datadome_captcha(self):
print("solve_datadome_captcha")
solver = GeoCaptchaSolver(self.page)
# time.sleep(2)
# self._click_audio_btn()
# time.sleep(1)
self._click_audio_btn()
time.sleep(20)
solver = GeoCaptchaSolver(self.page)
solver.solve()
# solver = antigateTask()
# solver.set_verbose(1)
# solver.set_key("ede6a69396fc961af351e7c8ffda9059")
# solver.set_website_url(RDV_URL)
# solver.set_template_name("Anti-bot screen bypass")
# solver.set_variables({
# "css_selector": ".captcha__human__container"
# })
# result = solver.solve_and_return_solution()
# if result != 0:
# cookies, localStorage, fingerprint, url, domain = result["cookies"], result["localStorage"], result[
# "fingerprint"], result["url"], result["domain"]
# print("cookies: ", cookies)
# print("localStorage: ", localStorage)
# print("fingerprint: ", fingerprint)
# print("url: " + url)
# print("domain: " + domain)
# # add cookies to playwright
# cookie_list = []
# cookie_list.append(SetCookieParam(name='datadome', value=cookies['datadome'], url=url))
# self.page.context.add_cookies(cookie_list)
# self.config_page_with_fingerprint(fingerprint)
# self.page.reload()
# else:
# print("task finished with error " + solver.error_code)
def _on_page_loaded(self):
# time.sleep(40000)
@@ -218,6 +200,10 @@ class CommandorPage:
self.get_errors()
except Exception as error:
self.logger.error(error)
# else:
# if self.check_is_blocked():
# print("we are blocked")
# # self.termine()
else:
self.solve_datadome_captcha()
@@ -264,10 +250,15 @@ class CommandorPage:
def _click_audio_btn(self):
time.sleep(get_random_wait_time())
self.page.evaluate("""{
let surname = document.getElementById('captcha__audio__button');
surname.click();}
""")
iframe = self.page.query_selector('iframe').content_frame()
iframe.query_selector('#captcha__audio__button').click()
time.sleep(2)
iframe.query_selector('.audio-captcha-play-button').evaluate("""(btn)=>{btn.click()}""")
# self.page.evaluate("""{
# let surname = document.getElementById('captcha__audio__button');
# surname.click();}
# """)
def get_errors(self):
# send error result
@@ -406,16 +397,18 @@ class CommandorPage:
MONGO_STORE_MANAGER.insert_captcha_error_contact(self.contact)
params.oracle_log_sender.send_captcha_error(self.contact)
def config_page_with_fingerprint(self, fingerprint):
self.page.add_init_script("""() => {
Object.defineProperty(navigator,'webdriver',{get: () => undefined});
Object.defineProperty(navigator, 'platform', {
get: () => {
return "iPhone";
}});
}
""")
def check_is_blocked(self) -> bool:
iframe = self.page.query_selector('iframe').content_frame()
self.iframe = iframe
print(type(iframe))
print("url is " + iframe.url)
r = requests.get(iframe.url)
body = r.text
# print("iframe content is " + body)
if BLOCKED_MESSAGE_FR in body or BLOCKED_MESSAGE_EN in body:
return True
else:
return False
def on_success(result: ReserveResultPojo):
@@ -427,7 +420,7 @@ def launch_page():
first_name="xingzhen",
mail="ColbyPatel653@gmail.com", ccid="", position=0)
page = CommandorPage(contact, store_type=1)
return page.start_page(params.get_proxy(ProxyType.BRIGHT_DATA))
return page.start_page(params.get_proxy(ProxyType.RESIDENTIAL))
def wait_for_otp(event: threading.Event, commandor: CommandorPage):
+135 -135
View File
@@ -1,135 +1,135 @@
import logging
import random
import traceback
from typing import Union
import sys
import time
from src import params
from src.db.mongo_manager import MONGO_STORE_MANAGER
from src.pojo.ReserveResultPojo import PublishType
from src.proxy.proxy_type import ProxyType
from src.workers.TlsPlaywright import TlsPlaywright
OTP_FIELD_ID = "#sms_code"
TIME_OUT = 10 * 60 * 1000 # 10 mins
PAGE_TIMEOUT = 40000
CONFIRMED_MESSAGE_FR = "Votre demande de rendez-vous Maroquinerie a bien été enregistrée et nous vous en remercions."
SORRY_SENTENCE_FR = "nous sommes sincèrement désolés de n'avoir pu vous satisfaire cette fois-ci"
DOUBLE_REQUEST_ERROR_MESSAGE_FR = "Une demande avec les données saisies a déjà été validée aujourdhui."
class LinkValidator:
tls = TlsPlaywright()
def __init__(self, link: str, proxy_type=ProxyType.BRIGHT_DATA, headless=False):
self.is_finished = False
self.link = link
self.proxy_type = proxy_type
self.is_event_sent = False
self.is_captcha_in_error = False
self.is_filling_fields = False
self.headless = headless
self.logger = logging.getLogger("LinkValidator")
def on_success(self):
self.logger.info("on_success called.")
self.is_finished = True
if not self.is_event_sent:
self.logger.info("will send successful event")
params.oracle_log_sender.send_url_validation_result()
self.is_event_sent = True
def timeout_occurred(self):
params.oracle_log_sender.send_timeout_log(self.link)
self.logger.info("will close timeout modem")
self.termine()
def _run(self, proxy):
self.logger.info("will start browser")
# reset otp_value to None
devices = random.choice(params.DEVICES)
first_page = None
while first_page is None:
first_page = self.start_browser(proxy, self.tls.playwright, devices)
proxy = params.get_proxy(self.proxy_type)
otp_input = self.page.locator(OTP_FIELD_ID)
otp_input.wait_for(state='visible', timeout=TIME_OUT)
self.logger.info("timeout")
self.termine()
def start_browser(self, proxy, pwright, device) -> Union[str, None]:
try:
self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy)
self.logger.info("模拟设备: " + device)
simulated_mobile = pwright.devices[device]
context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
self.page = context.new_page()
# hide webdriver information
self.page.add_init_script("""() => {
Object.defineProperty(navigator,'webdriver',{get: () => undefined});
Object.defineProperty(navigator, 'platform', {
get: () => {
return "iPhone";
}});
}
""")
self.page.on("load", self._on_page_loaded)
self.page.goto(self.link, timeout=PAGE_TIMEOUT)
captcha_url = "geo.captcha-delivery.com/captcha"
if captcha_url in self.page.content():
self.logger.info("will close browser")
self.browser.close()
return None
else:
return self.page.content()
except Exception as error:
params.oracle_log_sender.send_error(str(error))
traceback.print_exc(*sys.exc_info())
self.logger.exception(error)
self.logger.info("will close browser")
self.browser.close()
return None
def start_page(self, proxy, headless=False):
self.headless = headless
self._run(proxy)
def _on_page_loaded(self):
self.logger.info("页面加载完毕")
self.logger.info("url is " + self.page.url)
try:
message = self.page.content()
if CONFIRMED_MESSAGE_FR in message:
# publish the successful message
self.publish_message_to_queue(PublishType.SUCCESS)
elif SORRY_SENTENCE_FR in message:
# publish the successful message
self.publish_message_to_queue(PublishType.SUCCESS)
elif DOUBLE_REQUEST_ERROR_MESSAGE_FR in message:
# publish the successful message
self.publish_message_to_queue(PublishType.DUPLICATED)
except Exception as error:
self.logger.error(error)
def on_document_loaded(self):
self.logger.info("on_document_loaded called")
def _handle_errors(self, erro_content: str):
pass
def termine(self):
self.logger.info("will close browser")
time.sleep(1)
self.browser.close()
def publish_message_to_queue(self, status: PublishType):
# create the message
if status is PublishType.SUCCESS:
MONGO_STORE_MANAGER.link_validated_for_result(self.page.url)
else:
MONGO_STORE_MANAGER.link_validated_for_result(self.page.url, state=status.name)
self.on_success()
time.sleep(2)
self.browser.close()
import logging
import random
import traceback
from typing import Union
import sys
import time
from src import params
from src.db.mongo_manager import MONGO_STORE_MANAGER
from src.pojo.ReserveResultPojo import PublishType
from src.proxy.proxy_type import ProxyType
from src.workers.TlsPlaywright import TlsPlaywright
OTP_FIELD_ID = "#sms_code"
TIME_OUT = 10 * 60 * 1000 # 10 mins
PAGE_TIMEOUT = 40000
CONFIRMED_MESSAGE_FR = "Votre demande de rendez-vous Maroquinerie a bien été enregistrée et nous vous en remercions."
SORRY_SENTENCE_FR = "nous sommes sincèrement désolés de n'avoir pu vous satisfaire cette fois-ci"
DOUBLE_REQUEST_ERROR_MESSAGE_FR = "Une demande avec les données saisies a déjà été validée aujourdhui."
class LinkValidator:
tls = TlsPlaywright()
def __init__(self, link: str, proxy_type=ProxyType.RESIDENTIAL, headless=False):
self.is_finished = False
self.link = link
self.proxy_type = proxy_type
self.is_event_sent = False
self.is_captcha_in_error = False
self.is_filling_fields = False
self.headless = headless
self.logger = logging.getLogger("LinkValidator")
def on_success(self):
self.logger.info("on_success called.")
self.is_finished = True
if not self.is_event_sent:
self.logger.info("will send successful event")
params.oracle_log_sender.send_url_validation_result()
self.is_event_sent = True
def timeout_occurred(self):
params.oracle_log_sender.send_timeout_log(self.link)
self.logger.info("will close timeout modem")
self.termine()
def _run(self, proxy):
self.logger.info("will start browser")
# reset otp_value to None
devices = random.choice(params.DEVICES)
first_page = None
while first_page is None:
first_page = self.start_browser(proxy, self.tls.playwright, devices)
proxy = params.get_proxy(self.proxy_type)
otp_input = self.page.locator(OTP_FIELD_ID)
otp_input.wait_for(state='visible', timeout=TIME_OUT)
self.logger.info("timeout")
self.termine()
def start_browser(self, proxy, pwright, device) -> Union[str, None]:
try:
self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy)
self.logger.info("模拟设备: " + device)
simulated_mobile = pwright.devices[device]
context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
self.page = context.new_page()
# hide webdriver information
self.page.add_init_script("""() => {
Object.defineProperty(navigator,'webdriver',{get: () => undefined});
Object.defineProperty(navigator, 'platform', {
get: () => {
return "iPhone";
}});
}
""")
self.page.on("load", self._on_page_loaded)
self.page.goto(self.link, timeout=PAGE_TIMEOUT)
captcha_url = "geo.captcha-delivery.com/captcha"
if captcha_url in self.page.content():
self.logger.info("will close browser")
self.browser.close()
return None
else:
return self.page.content()
except Exception as error:
params.oracle_log_sender.send_error(str(error))
traceback.print_exc(*sys.exc_info())
self.logger.exception(error)
self.logger.info("will close browser")
self.browser.close()
return None
def start_page(self, proxy, headless=False):
self.headless = headless
self._run(proxy)
def _on_page_loaded(self):
self.logger.info("页面加载完毕")
self.logger.info("url is " + self.page.url)
try:
message = self.page.content()
if CONFIRMED_MESSAGE_FR in message:
# publish the successful message
self.publish_message_to_queue(PublishType.SUCCESS)
elif SORRY_SENTENCE_FR in message:
# publish the successful message
self.publish_message_to_queue(PublishType.SUCCESS)
elif DOUBLE_REQUEST_ERROR_MESSAGE_FR in message:
# publish the successful message
self.publish_message_to_queue(PublishType.DUPLICATED)
except Exception as error:
self.logger.error(error)
def on_document_loaded(self):
self.logger.info("on_document_loaded called")
def _handle_errors(self, erro_content: str):
pass
def termine(self):
self.logger.info("will close browser")
time.sleep(1)
self.browser.close()
def publish_message_to_queue(self, status: PublishType):
# create the message
if status is PublishType.SUCCESS:
MONGO_STORE_MANAGER.link_validated_for_result(self.page.url)
else:
MONGO_STORE_MANAGER.link_validated_for_result(self.page.url, state=status.name)
self.on_success()
time.sleep(2)
self.browser.close()