webkit is blocked
This commit is contained in:
@@ -3,6 +3,7 @@ firebase_admin==5.2.0
|
|||||||
pandas~=1.3.5
|
pandas~=1.3.5
|
||||||
playwright==1.25.0
|
playwright==1.25.0
|
||||||
dataclasses~=0.6
|
dataclasses~=0.6
|
||||||
|
SpeechRecognition==3.8.1
|
||||||
pymongo==4.1.1
|
pymongo==4.1.1
|
||||||
anticaptchaofficial==1.0.43
|
anticaptchaofficial==1.0.43
|
||||||
oci~=2.54.1
|
oci~=2.54.1
|
||||||
|
|||||||
@@ -163,7 +163,7 @@ def read_mails():
|
|||||||
if need_to_valid_url(url, successful_items):
|
if need_to_valid_url(url, successful_items):
|
||||||
url_validator = LinkValidator(url)
|
url_validator = LinkValidator(url)
|
||||||
print("need to validate url: " + url)
|
print("need to validate url: " + url)
|
||||||
executor.submit(url_validator.start_page, params.get_proxy(ProxyType.BRIGHT_DATA), True)
|
executor.submit(url_validator.start_page, params.get_proxy(ProxyType.BRIGHT_DATA), False)
|
||||||
else:
|
else:
|
||||||
print("do not need to click url --> {}".format(mail.mail_address))
|
print("do not need to click url --> {}".format(mail.mail_address))
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,75 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import speech_recognition as sr
|
||||||
|
import wget
|
||||||
|
|
||||||
|
WAV_FILE_REGEX = "https:[\/a-z0-9.-]+.wav"
|
||||||
|
|
||||||
|
number_text_dict = {'eight': 8, 'zero': 0, 'one': 1, 'to': 2, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6,
|
||||||
|
'seven': 7, 'nine': 9}
|
||||||
|
|
||||||
|
|
||||||
|
class GeoCaptchaSolver:
|
||||||
|
|
||||||
|
def __init__(self, page):
|
||||||
|
self.page = page
|
||||||
|
|
||||||
|
def solve(self):
|
||||||
|
print("solve() called.")
|
||||||
|
print("try to find iframe")
|
||||||
|
iframe = self.page.query_selector('iframe').content_frame()
|
||||||
|
self.iframe = iframe
|
||||||
|
print(type(iframe))
|
||||||
|
print("url is " + iframe.url)
|
||||||
|
r = requests.get(iframe.url)
|
||||||
|
body = r.text
|
||||||
|
# print(body)
|
||||||
|
match = re.search(WAV_FILE_REGEX, body)
|
||||||
|
if match:
|
||||||
|
wav_url = match.group(0)
|
||||||
|
print("wav file " + wav_url)
|
||||||
|
response = requests.get(wav_url)
|
||||||
|
file_name = wav_url.split("/")[-1]
|
||||||
|
wget.download(wav_url, file_name)
|
||||||
|
open(file_name, "wb").write(response.content)
|
||||||
|
r = sr.Recognizer()
|
||||||
|
with sr.AudioFile(file_name) as source:
|
||||||
|
audio = r.record(source) # read the entire audio file
|
||||||
|
# recognize speech using Sphinx
|
||||||
|
try:
|
||||||
|
text = r.recognize_sphinx(audio)
|
||||||
|
number_list = text.split(" ")[-6:]
|
||||||
|
print(number_list)
|
||||||
|
number_to_type = []
|
||||||
|
for number in number_list:
|
||||||
|
number_to_type.append(number_text_dict[number])
|
||||||
|
print(number_to_type)
|
||||||
|
if len(number_to_type) == 6:
|
||||||
|
self.input_numbers(number_to_type)
|
||||||
|
print("Sphinx thinks you said " + text)
|
||||||
|
except sr.UnknownValueError:
|
||||||
|
print("Sphinx could not understand audio")
|
||||||
|
except sr.RequestError as e:
|
||||||
|
print("Sphinx error; {0}".format(e))
|
||||||
|
else:
|
||||||
|
print("wav file not found")
|
||||||
|
|
||||||
|
def input_numbers(self, number_to_type):
|
||||||
|
# use javascript to input
|
||||||
|
|
||||||
|
self.input_by_js(number_to_type)
|
||||||
|
# input_elements = self.page.query_selector('.audio-captcha-inputs')
|
||||||
|
# print(input_elements)
|
||||||
|
|
||||||
|
# download file
|
||||||
|
|
||||||
|
def input_by_js(self, number_to_type):
|
||||||
|
index = 0
|
||||||
|
for number in number_to_type:
|
||||||
|
index = index + 1
|
||||||
|
(self.iframe.page).evaluate("""(info) =>{
|
||||||
|
let input_fileds = document.getElementsByClassName("audio-captcha-inputs");
|
||||||
|
console.log(input_fileds)
|
||||||
|
input_fileds[info.index].value = info.number;}
|
||||||
|
""", {'index': index, 'number': number})
|
||||||
@@ -8,8 +8,6 @@ import time
|
|||||||
import traceback
|
import traceback
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
from anticaptchaofficial.antigatetask import antigateTask
|
|
||||||
from playwright._impl._api_structures import SetCookieParam
|
|
||||||
from playwright_stealth import stealth_sync
|
from playwright_stealth import stealth_sync
|
||||||
|
|
||||||
from src import params, definitions
|
from src import params, definitions
|
||||||
@@ -18,6 +16,7 @@ from src.pojo.ModeEnum import ModeEnum
|
|||||||
from src.pojo.ReserveResultPojo import ReserveResultPojo, PublishType
|
from src.pojo.ReserveResultPojo import ReserveResultPojo, PublishType
|
||||||
from src.pojo.contact_pojo import ContactPojo
|
from src.pojo.contact_pojo import ContactPojo
|
||||||
from src.proxy.proxy_type import ProxyType
|
from src.proxy.proxy_type import ProxyType
|
||||||
|
from src.workers.GeoCaptchSolver import GeoCaptchaSolver
|
||||||
from src.workers.SolveCaptch import SolveCaptcha
|
from src.workers.SolveCaptch import SolveCaptcha
|
||||||
from src.workers.TlsPlaywright import TlsPlaywright
|
from src.workers.TlsPlaywright import TlsPlaywright
|
||||||
|
|
||||||
@@ -30,6 +29,7 @@ REGEX_RDV_URL = "https:\/\/rendezvousparis\.hermes\.com\/client\/register\/[A-Z0
|
|||||||
otp_value = None
|
otp_value = None
|
||||||
OTP_FIELD_ID = "#sms_code"
|
OTP_FIELD_ID = "#sms_code"
|
||||||
MESSAGE_FIELD_CLASS = ".message"
|
MESSAGE_FIELD_CLASS = ".message"
|
||||||
|
audio_button = "captcha__audio__button"
|
||||||
BLANK_URL = "about:blank"
|
BLANK_URL = "about:blank"
|
||||||
CONFIRMED_MESSAGE = "Your request for a Leather Goods appointment has been registered"
|
CONFIRMED_MESSAGE = "Your request for a Leather Goods appointment has been registered"
|
||||||
CONFIRMED_MESSAGE_FR = "Votre demande de rendez-vous Maroquinerie a bien été enregistrée et nous vous en remercions."
|
CONFIRMED_MESSAGE_FR = "Votre demande de rendez-vous Maroquinerie a bien été enregistrée et nous vous en remercions."
|
||||||
@@ -124,40 +124,17 @@ class CommandorPage:
|
|||||||
|
|
||||||
def start_browser(self, proxy, pwright, device) -> Union[str, None]:
|
def start_browser(self, proxy, pwright, device) -> Union[str, None]:
|
||||||
try:
|
try:
|
||||||
self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy)
|
self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT)
|
||||||
|
# self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy)
|
||||||
# userAgent = random.choice(params.firefox_user_agent_list)
|
# userAgent = random.choice(params.firefox_user_agent_list)
|
||||||
simulated_mobile = pwright.devices[device]
|
simulated_mobile = pwright.devices[device]
|
||||||
userAgent = simulated_mobile['user_agent']
|
userAgent = simulated_mobile['user_agent']
|
||||||
print("user_agent is " + userAgent)
|
print("user_agent is " + userAgent)
|
||||||
# context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
|
# context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
|
||||||
context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
|
context = self.browser.new_context(**simulated_mobile)
|
||||||
self.current_context = context
|
self.current_context = context
|
||||||
self.create_and_config_page(context)
|
self.create_and_config_page(context)
|
||||||
return self.page.content()
|
return self.page.content()
|
||||||
# self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy)
|
|
||||||
# self.logger.info("模拟设备: " + device)
|
|
||||||
# simulated_mobile = pwright.devices[device]
|
|
||||||
# context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
|
|
||||||
# self.page = context.new_page()
|
|
||||||
# # hide webdriver information
|
|
||||||
# self.page.add_init_script("""() => {
|
|
||||||
# Object.defineProperty(navigator,'webdriver',{get: () => undefined});
|
|
||||||
# Object.defineProperty(navigator, 'platform', {
|
|
||||||
# get: () => {
|
|
||||||
# return "iPhone";
|
|
||||||
# }});
|
|
||||||
# }
|
|
||||||
# """)
|
|
||||||
# self.page.on("load", self._on_page_loaded)
|
|
||||||
# self.page.on("response", self.handle_response)
|
|
||||||
# self.page.goto(RDV_URL, timeout=PAGE_TIMEOUT)
|
|
||||||
# captcha_url = "geo.captcha-delivery.com/captcha"
|
|
||||||
# if captcha_url in self.page.content():
|
|
||||||
# self.logger.info("will close browser")
|
|
||||||
# self.browser.close()
|
|
||||||
# return None
|
|
||||||
# else:
|
|
||||||
# return self.page.content()
|
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
params.oracle_log_sender.send_error(str(error))
|
params.oracle_log_sender.send_error(str(error))
|
||||||
traceback.print_exc(*sys.exc_info())
|
traceback.print_exc(*sys.exc_info())
|
||||||
@@ -194,31 +171,36 @@ class CommandorPage:
|
|||||||
|
|
||||||
def solve_datadome_captcha(self):
|
def solve_datadome_captcha(self):
|
||||||
print("solve_datadome_captcha")
|
print("solve_datadome_captcha")
|
||||||
solver = antigateTask()
|
solver = GeoCaptchaSolver(self.page)
|
||||||
solver.set_verbose(1)
|
# time.sleep(2)
|
||||||
solver.set_key("ede6a69396fc961af351e7c8ffda9059")
|
# self._click_audio_btn()
|
||||||
solver.set_website_url(RDV_URL)
|
# time.sleep(1)
|
||||||
solver.set_template_name("Anti-bot screen bypass")
|
solver.solve()
|
||||||
solver.set_variables({
|
# solver = antigateTask()
|
||||||
"css_selector": ".captcha__human__container"
|
# solver.set_verbose(1)
|
||||||
})
|
# solver.set_key("ede6a69396fc961af351e7c8ffda9059")
|
||||||
result = solver.solve_and_return_solution()
|
# solver.set_website_url(RDV_URL)
|
||||||
if result != 0:
|
# solver.set_template_name("Anti-bot screen bypass")
|
||||||
cookies, localStorage, fingerprint, url, domain = result["cookies"], result["localStorage"], result[
|
# solver.set_variables({
|
||||||
"fingerprint"], result["url"], result["domain"]
|
# "css_selector": ".captcha__human__container"
|
||||||
print("cookies: ", cookies)
|
# })
|
||||||
print("localStorage: ", localStorage)
|
# result = solver.solve_and_return_solution()
|
||||||
print("fingerprint: ", fingerprint)
|
# if result != 0:
|
||||||
print("url: " + url)
|
# cookies, localStorage, fingerprint, url, domain = result["cookies"], result["localStorage"], result[
|
||||||
print("domain: " + domain)
|
# "fingerprint"], result["url"], result["domain"]
|
||||||
# add cookies to playwright
|
# print("cookies: ", cookies)
|
||||||
cookie_list = []
|
# print("localStorage: ", localStorage)
|
||||||
cookie_list.append(SetCookieParam(name='datadome', value=cookies['datadome'], url=url))
|
# print("fingerprint: ", fingerprint)
|
||||||
self.page.context.add_cookies(cookie_list)
|
# print("url: " + url)
|
||||||
self.config_page_with_fingerprint(fingerprint)
|
# print("domain: " + domain)
|
||||||
self.page.reload()
|
# # add cookies to playwright
|
||||||
else:
|
# cookie_list = []
|
||||||
print("task finished with error " + solver.error_code)
|
# cookie_list.append(SetCookieParam(name='datadome', value=cookies['datadome'], url=url))
|
||||||
|
# self.page.context.add_cookies(cookie_list)
|
||||||
|
# self.config_page_with_fingerprint(fingerprint)
|
||||||
|
# self.page.reload()
|
||||||
|
# else:
|
||||||
|
# print("task finished with error " + solver.error_code)
|
||||||
|
|
||||||
def _on_page_loaded(self):
|
def _on_page_loaded(self):
|
||||||
# time.sleep(40000)
|
# time.sleep(40000)
|
||||||
@@ -236,8 +218,8 @@ class CommandorPage:
|
|||||||
self.get_errors()
|
self.get_errors()
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
self.logger.error(error)
|
self.logger.error(error)
|
||||||
# else:
|
else:
|
||||||
# self.solve_datadome_captcha()
|
self.solve_datadome_captcha()
|
||||||
|
|
||||||
def on_document_loaded(self):
|
def on_document_loaded(self):
|
||||||
self.logger.info("on_document_loaded called")
|
self.logger.info("on_document_loaded called")
|
||||||
@@ -280,6 +262,13 @@ class CommandorPage:
|
|||||||
except Exception as error:
|
except Exception as error:
|
||||||
self.logger.error(error)
|
self.logger.error(error)
|
||||||
|
|
||||||
|
def _click_audio_btn(self):
|
||||||
|
time.sleep(get_random_wait_time())
|
||||||
|
self.page.evaluate("""{
|
||||||
|
let surname = document.getElementById('captcha__audio__button');
|
||||||
|
surname.click();}
|
||||||
|
""")
|
||||||
|
|
||||||
def get_errors(self):
|
def get_errors(self):
|
||||||
# send error result
|
# send error result
|
||||||
if self.page.url != BLANK_URL:
|
if self.page.url != BLANK_URL:
|
||||||
|
|||||||
Reference in New Issue
Block a user