webkit is blocked

This commit is contained in:
2022-09-02 14:18:06 +02:00
parent 7bcae96a8f
commit ba99e54ea7
4 changed files with 121 additions and 56 deletions
+1
View File
@@ -3,6 +3,7 @@ firebase_admin==5.2.0
pandas~=1.3.5
playwright==1.25.0
dataclasses~=0.6
SpeechRecognition==3.8.1
pymongo==4.1.1
anticaptchaofficial==1.0.43
oci~=2.54.1
+1 -1
View File
@@ -163,7 +163,7 @@ def read_mails():
if need_to_valid_url(url, successful_items):
url_validator = LinkValidator(url)
print("need to validate url: " + url)
executor.submit(url_validator.start_page, params.get_proxy(ProxyType.BRIGHT_DATA), True)
executor.submit(url_validator.start_page, params.get_proxy(ProxyType.BRIGHT_DATA), False)
else:
print("do not need to click url --> {}".format(mail.mail_address))
+75
View File
@@ -0,0 +1,75 @@
import re
import requests
import speech_recognition as sr
import wget
WAV_FILE_REGEX = "https:[\/a-z0-9.-]+.wav"
number_text_dict = {'eight': 8, 'zero': 0, 'one': 1, 'to': 2, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6,
'seven': 7, 'nine': 9}
class GeoCaptchaSolver:
def __init__(self, page):
self.page = page
def solve(self):
print("solve() called.")
print("try to find iframe")
iframe = self.page.query_selector('iframe').content_frame()
self.iframe = iframe
print(type(iframe))
print("url is " + iframe.url)
r = requests.get(iframe.url)
body = r.text
# print(body)
match = re.search(WAV_FILE_REGEX, body)
if match:
wav_url = match.group(0)
print("wav file " + wav_url)
response = requests.get(wav_url)
file_name = wav_url.split("/")[-1]
wget.download(wav_url, file_name)
open(file_name, "wb").write(response.content)
r = sr.Recognizer()
with sr.AudioFile(file_name) as source:
audio = r.record(source) # read the entire audio file
# recognize speech using Sphinx
try:
text = r.recognize_sphinx(audio)
number_list = text.split(" ")[-6:]
print(number_list)
number_to_type = []
for number in number_list:
number_to_type.append(number_text_dict[number])
print(number_to_type)
if len(number_to_type) == 6:
self.input_numbers(number_to_type)
print("Sphinx thinks you said " + text)
except sr.UnknownValueError:
print("Sphinx could not understand audio")
except sr.RequestError as e:
print("Sphinx error; {0}".format(e))
else:
print("wav file not found")
def input_numbers(self, number_to_type):
# use javascript to input
self.input_by_js(number_to_type)
# input_elements = self.page.query_selector('.audio-captcha-inputs')
# print(input_elements)
# download file
def input_by_js(self, number_to_type):
index = 0
for number in number_to_type:
index = index + 1
(self.iframe.page).evaluate("""(info) =>{
let input_fileds = document.getElementsByClassName("audio-captcha-inputs");
console.log(input_fileds)
input_fileds[info.index].value = info.number;}
""", {'index': index, 'number': number})
+44 -55
View File
@@ -8,8 +8,6 @@ import time
import traceback
from typing import Union
from anticaptchaofficial.antigatetask import antigateTask
from playwright._impl._api_structures import SetCookieParam
from playwright_stealth import stealth_sync
from src import params, definitions
@@ -18,6 +16,7 @@ from src.pojo.ModeEnum import ModeEnum
from src.pojo.ReserveResultPojo import ReserveResultPojo, PublishType
from src.pojo.contact_pojo import ContactPojo
from src.proxy.proxy_type import ProxyType
from src.workers.GeoCaptchSolver import GeoCaptchaSolver
from src.workers.SolveCaptch import SolveCaptcha
from src.workers.TlsPlaywright import TlsPlaywright
@@ -30,6 +29,7 @@ REGEX_RDV_URL = "https:\/\/rendezvousparis\.hermes\.com\/client\/register\/[A-Z0
otp_value = None
OTP_FIELD_ID = "#sms_code"
MESSAGE_FIELD_CLASS = ".message"
audio_button = "captcha__audio__button"
BLANK_URL = "about:blank"
CONFIRMED_MESSAGE = "Your request for a Leather Goods appointment has been registered"
CONFIRMED_MESSAGE_FR = "Votre demande de rendez-vous Maroquinerie a bien été enregistrée et nous vous en remercions."
@@ -124,40 +124,17 @@ class CommandorPage:
def start_browser(self, proxy, pwright, device) -> Union[str, None]:
try:
self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy)
self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT)
# self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy)
# userAgent = random.choice(params.firefox_user_agent_list)
simulated_mobile = pwright.devices[device]
userAgent = simulated_mobile['user_agent']
print("user_agent is " + userAgent)
# context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
context = self.browser.new_context(**simulated_mobile)
self.current_context = context
self.create_and_config_page(context)
return self.page.content()
# self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy)
# self.logger.info("模拟设备: " + device)
# simulated_mobile = pwright.devices[device]
# context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
# self.page = context.new_page()
# # hide webdriver information
# self.page.add_init_script("""() => {
# Object.defineProperty(navigator,'webdriver',{get: () => undefined});
# Object.defineProperty(navigator, 'platform', {
# get: () => {
# return "iPhone";
# }});
# }
# """)
# self.page.on("load", self._on_page_loaded)
# self.page.on("response", self.handle_response)
# self.page.goto(RDV_URL, timeout=PAGE_TIMEOUT)
# captcha_url = "geo.captcha-delivery.com/captcha"
# if captcha_url in self.page.content():
# self.logger.info("will close browser")
# self.browser.close()
# return None
# else:
# return self.page.content()
except Exception as error:
params.oracle_log_sender.send_error(str(error))
traceback.print_exc(*sys.exc_info())
@@ -194,31 +171,36 @@ class CommandorPage:
def solve_datadome_captcha(self):
print("solve_datadome_captcha")
solver = antigateTask()
solver.set_verbose(1)
solver.set_key("ede6a69396fc961af351e7c8ffda9059")
solver.set_website_url(RDV_URL)
solver.set_template_name("Anti-bot screen bypass")
solver.set_variables({
"css_selector": ".captcha__human__container"
})
result = solver.solve_and_return_solution()
if result != 0:
cookies, localStorage, fingerprint, url, domain = result["cookies"], result["localStorage"], result[
"fingerprint"], result["url"], result["domain"]
print("cookies: ", cookies)
print("localStorage: ", localStorage)
print("fingerprint: ", fingerprint)
print("url: " + url)
print("domain: " + domain)
# add cookies to playwright
cookie_list = []
cookie_list.append(SetCookieParam(name='datadome', value=cookies['datadome'], url=url))
self.page.context.add_cookies(cookie_list)
self.config_page_with_fingerprint(fingerprint)
self.page.reload()
else:
print("task finished with error " + solver.error_code)
solver = GeoCaptchaSolver(self.page)
# time.sleep(2)
# self._click_audio_btn()
# time.sleep(1)
solver.solve()
# solver = antigateTask()
# solver.set_verbose(1)
# solver.set_key("ede6a69396fc961af351e7c8ffda9059")
# solver.set_website_url(RDV_URL)
# solver.set_template_name("Anti-bot screen bypass")
# solver.set_variables({
# "css_selector": ".captcha__human__container"
# })
# result = solver.solve_and_return_solution()
# if result != 0:
# cookies, localStorage, fingerprint, url, domain = result["cookies"], result["localStorage"], result[
# "fingerprint"], result["url"], result["domain"]
# print("cookies: ", cookies)
# print("localStorage: ", localStorage)
# print("fingerprint: ", fingerprint)
# print("url: " + url)
# print("domain: " + domain)
# # add cookies to playwright
# cookie_list = []
# cookie_list.append(SetCookieParam(name='datadome', value=cookies['datadome'], url=url))
# self.page.context.add_cookies(cookie_list)
# self.config_page_with_fingerprint(fingerprint)
# self.page.reload()
# else:
# print("task finished with error " + solver.error_code)
def _on_page_loaded(self):
# time.sleep(40000)
@@ -236,8 +218,8 @@ class CommandorPage:
self.get_errors()
except Exception as error:
self.logger.error(error)
# else:
# self.solve_datadome_captcha()
else:
self.solve_datadome_captcha()
def on_document_loaded(self):
self.logger.info("on_document_loaded called")
@@ -280,6 +262,13 @@ class CommandorPage:
except Exception as error:
self.logger.error(error)
def _click_audio_btn(self):
time.sleep(get_random_wait_time())
self.page.evaluate("""{
let surname = document.getElementById('captcha__audio__button');
surname.click();}
""")
def get_errors(self):
# send error result
if self.page.url != BLANK_URL: