try playwright stelth

This commit is contained in:
Lei PAN
2022-09-01 16:22:39 +02:00
parent b99737e7e4
commit 7bcae96a8f
5 changed files with 1011 additions and 958 deletions
+17 -15
View File
@@ -1,16 +1,18 @@
dataclasses_json==0.5.7 dataclasses_json==0.5.7
firebase_admin==5.2.0 firebase_admin==5.2.0
pandas~=1.3.5 pandas~=1.3.5
playwright==1.23.0 playwright==1.25.0
dataclasses~=0.6 dataclasses~=0.6
pymongo==4.1.1 pymongo==4.1.1
oci~=2.54.1 anticaptchaofficial==1.0.43
XlsxWriter~=3.0.3 oci~=2.54.1
boto3~=1.21.13 XlsxWriter~=3.0.3
openpyxl==3.0.9 playwright-stealth==1.0.5
google-cloud-firestore==2.4.0 boto3~=1.21.13
PySimpleGUI==4.60.1 openpyxl==3.0.9
SQLAlchemy~=1.4.37 google-cloud-firestore==2.4.0
requests~=2.27.1 PySimpleGUI==4.60.1
sqlalchemy_utils~=0.38.2 SQLAlchemy~=1.4.37
requests~=2.27.1
sqlalchemy_utils~=0.38.2
Mako~=1.2.0 Mako~=1.2.0
+173 -173
View File
@@ -1,173 +1,173 @@
import datetime import datetime
import email import email
import imaplib import imaplib
import re import re
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from email.header import decode_header from email.header import decode_header
from email.message import Message from email.message import Message
from builtins import list from builtins import list
from src import params from src import params
from src.db.mongo_manager import MONGO_STORE_MANAGER from src.db.mongo_manager import MONGO_STORE_MANAGER
from src.pojo.mail.mail_pojo import MailPojo, MailAddress from src.pojo.mail.mail_pojo import MailPojo, MailAddress
from src.proxy.proxy_type import ProxyType from src.proxy.proxy_type import ProxyType
from src.workers.link_validator import LinkValidator from src.workers.link_validator import LinkValidator
AOL_IMAP_SERVER = "imap.aol.com" AOL_IMAP_SERVER = "imap.aol.com"
IMAP_SERVER_163 = "imap.163.com" IMAP_SERVER_163 = "imap.163.com"
VALIDATION_URL_SUBJECT = 'Validation de votre demande de rendez-vous' VALIDATION_URL_SUBJECT = 'Validation de votre demande de rendez-vous'
VALIDATION_URL_REGEX = """https:\/\/rendezvousparis.hermes.com\/client\/register\/[A-Z0-9]+\/validate.code=[A-Z0-9]+""" VALIDATION_URL_REGEX = """https:\/\/rendezvousparis.hermes.com\/client\/register\/[A-Z0-9]+\/validate.code=[A-Z0-9]+"""
HERMES_EMAIL = "no-reply@hermes.com" HERMES_EMAIL = "no-reply@hermes.com"
DOMAIN_163 = "163.com" DOMAIN_163 = "163.com"
date_format = "%d-%b-%Y" # DD-Mon-YYYY e.g., 3-Mar-2014 date_format = "%d-%b-%Y" # DD-Mon-YYYY e.g., 3-Mar-2014
class MailReader(): class MailReader():
def __init__(self, login, password): def __init__(self, login, password):
self.login = login self.login = login
self.password = password self.password = password
@staticmethod @staticmethod
def show_folders(imap): def show_folders(imap):
for i in imap.list()[1]: for i in imap.list()[1]:
l = i.decode().split(' "/" ') l = i.decode().split(' "/" ')
print(l[0] + " = " + l[1]) print(l[0] + " = " + l[1])
def read_emails(self, mails_messages: list) -> list: def read_emails(self, mails_messages: list) -> list:
# create an IMAP4 class with SSL # create an IMAP4 class with SSL
if DOMAIN_163 in self.login: if DOMAIN_163 in self.login:
imap = imaplib.IMAP4_SSL(IMAP_SERVER_163) imap = imaplib.IMAP4_SSL(IMAP_SERVER_163)
else: else:
imap = imaplib.IMAP4_SSL(AOL_IMAP_SERVER) imap = imaplib.IMAP4_SSL(AOL_IMAP_SERVER)
# authenticate # authenticate
imap.login(self.login, self.password) imap.login(self.login, self.password)
mail_list = [] mail_list = []
print("read mails from {}".format(self.login)) print("read mails from {}".format(self.login))
# self.show_folders(imap) # self.show_folders(imap)
# total number of emails # total number of emails
# get mails from inbox # get mails from inbox
# (\Archive \HasNoChildren) = "Archive" # (\Archive \HasNoChildren) = "Archive"
# (\Junk \HasNoChildren) = "Bulk" # (\Junk \HasNoChildren) = "Bulk"
# (\Drafts \HasNoChildren) = "Draft" # (\Drafts \HasNoChildren) = "Draft"
# (\HasNoChildren) = "Inbox" # (\HasNoChildren) = "Inbox"
# (\Sent \HasNoChildren) = "Sent" # (\Sent \HasNoChildren) = "Sent"
# (\Trash \HasNoChildren) = "Trash" # (\Trash \HasNoChildren) = "Trash"
mail_list.extend(self._get_messages_from_folder(imap)) mail_list.extend(self._get_messages_from_folder(imap))
# mail_list.extend(self._get_messages_from_folder(imap, folder="Bulk")) # mail_list.extend(self._get_messages_from_folder(imap, folder="Bulk"))
# close the connection and logout # close the connection and logout
imap.close() imap.close()
imap.logout() imap.logout()
mails_messages.extend(mail_list) mails_messages.extend(mail_list)
return mail_list return mail_list
def _get_messages_from_folder(self, imap, folder="INBOX") -> list: def _get_messages_from_folder(self, imap, folder="INBOX") -> list:
imap.select(folder) imap.select(folder)
mail_messages = [] mail_messages = []
typ, data = imap.search(None, '(SUBJECT "{}" SINCE "{}")'.format(VALIDATION_URL_SUBJECT, typ, data = imap.search(None, '(SUBJECT "{}" SINCE "{}")'.format(VALIDATION_URL_SUBJECT,
datetime.datetime.today().strftime( datetime.datetime.today().strftime(
date_format))) date_format)))
for i in data[0].split(): for i in data[0].split():
# fetch the email message by ID # fetch the email message by ID
res, msg = imap.fetch(i.decode("utf-8"), "(RFC822)") res, msg = imap.fetch(i.decode("utf-8"), "(RFC822)")
body = '' body = ''
for response in msg: for response in msg:
if isinstance(response, tuple): if isinstance(response, tuple):
# parse a bytes email into a message object # parse a bytes email into a message object
msg = email.message_from_bytes(response[1]) msg = email.message_from_bytes(response[1])
# decode the email subject # decode the email subject
subject, subject_encoded = decode_header(msg["Subject"])[0] subject, subject_encoded = decode_header(msg["Subject"])[0]
received_date = msg["Date"] received_date = msg["Date"]
if isinstance(subject, bytes): if isinstance(subject, bytes):
# if it's a bytes, decode to str # if it's a bytes, decode to str
subject = subject.decode(subject_encoded) subject = subject.decode(subject_encoded)
# decode email sender # decode email sender
from_address, subject_encoded = decode_header(msg.get("From"))[0] from_address, subject_encoded = decode_header(msg.get("From"))[0]
if isinstance(from_address, bytes): if isinstance(from_address, bytes):
from_address = from_address.decode(subject_encoded) from_address = from_address.decode(subject_encoded)
print("From:", from_address) print("From:", from_address)
print("Subject:", subject) print("Subject:", subject)
# if the email message is multipart # if the email message is multipart
if msg.is_multipart(): if msg.is_multipart():
# iterate over email parts # iterate over email parts
for part in msg.walk(): for part in msg.walk():
try: try:
# get the email body # get the email body
payloads = part.get_payload() payloads = part.get_payload()
if isinstance(payloads, list): if isinstance(payloads, list):
for payload in payloads: for payload in payloads:
if isinstance(payload, Message): if isinstance(payload, Message):
body = body + payload.get_payload(decode=True).decode("iso-8859-1") body = body + payload.get_payload(decode=True).decode("iso-8859-1")
# print(body) # print(body)
except Exception as Error: except Exception as Error:
print(Error) print(Error)
else: else:
body = msg.get_payload(decode=True).decode() body = msg.get_payload(decode=True).decode()
print(body) print(body)
if VALIDATION_URL_SUBJECT in subject: if VALIDATION_URL_SUBJECT in subject:
mail = MailPojo(subject=subject, body=body, from_address=from_address) mail = MailPojo(subject=subject, body=body, from_address=from_address)
mail_messages.append(mail) mail_messages.append(mail)
return mail_messages return mail_messages
def clean(text): def clean(text):
# clean text for creating a folder # clean text for creating a folder
return "".join(c if c.isalnum() else "_" for c in text) return "".join(c if c.isalnum() else "_" for c in text)
def need_to_valid_url(url: str, successful_items) -> bool: def need_to_valid_url(url: str, successful_items) -> bool:
print("url is :" + url) print("url is :" + url)
parts = url.split('/') parts = url.split('/')
id = parts[5] id = parts[5]
if len(id) == 6: if len(id) == 6:
for item in successful_items: for item in successful_items:
# if item.url_validated is not None: # if item.url_validated is not None:
# print("id:{}, status:{} ".format(id, str(item.url_validated))) # print("id:{}, status:{} ".format(id, str(item.url_validated)))
if item.id == id: if item.id == id:
if item.url_validated is not None: if item.url_validated is not None:
return not item.url_validated return not item.url_validated
else: else:
# if url_validated is None # if url_validated is None
return True return True
# return True by default # return True by default
return False return False
else: else:
print("id not valid:{}".format(id)) print("id not valid:{}".format(id))
return False return False
def read_mails(): def read_mails():
# get email address # get email address
mail_list = MONGO_STORE_MANAGER.get_destination_emails() mail_list = MONGO_STORE_MANAGER.get_destination_emails()
# # mail_address1 = MailAddress(mail="appointment2022@aol.com", password="gyilpmvyyvlcaviq") # # mail_address1 = MailAddress(mail="appointment2022@aol.com", password="gyilpmvyyvlcaviq")
# # mail_address2 = MailAddress(mail="chenpeijun@aol.com", password="ytifuwguknzifqyb") # # mail_address2 = MailAddress(mail="chenpeijun@aol.com", password="ytifuwguknzifqyb")
# # mail_address2 = MailAddress(mail="sdfgfhgf1986@aol.com", password="fjwcgvhxxlywqfwm") # # mail_address2 = MailAddress(mail="sdfgfhgf1986@aol.com", password="fjwcgvhxxlywqfwm")
# # mail_address3 = MailAddress(mail="ciyuexie@aol.com", password="czezlmmyypokdfce") # # mail_address3 = MailAddress(mail="ciyuexie@aol.com", password="czezlmmyypokdfce")
# # mail_address4 = MailAddress(mail="hongjiang176@aol.com", password="ftzpscgzvwneelmn") # # mail_address4 = MailAddress(mail="hongjiang176@aol.com", password="ftzpscgzvwneelmn")
# mail_address4 = MailAddress(mail="ribka_puchkova@aol.com", password="aqvsozoyifbixtdc") # mail_address4 = MailAddress(mail="ribka_puchkova@aol.com", password="aqvsozoyifbixtdc")
# mail_list = [mail_address3, mail_address2, mail_address1, mail_address4] # mail_list = [mail_address3, mail_address2, mail_address1, mail_address4]
# mail_list = [mail_address4] # mail_list = [mail_address4]
successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day() successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
mails_messages = [] mails_messages = []
with ThreadPoolExecutor(max_workers=20) as executor: with ThreadPoolExecutor(max_workers=20) as executor:
for mail in mail_list: for mail in mail_list:
mail_reader = MailReader(mail.mail, mail.password) mail_reader = MailReader(mail.mail, mail.password)
executor.submit(mail_reader.read_emails, mails_messages) executor.submit(mail_reader.read_emails, mails_messages)
with ThreadPoolExecutor(max_workers=20) as executor: with ThreadPoolExecutor(max_workers=20) as executor:
for mail in mails_messages: for mail in mails_messages:
match = re.search(VALIDATION_URL_REGEX, mail.body) match = re.search(VALIDATION_URL_REGEX, mail.body)
if match: if match:
url = match.group(0) url = match.group(0)
if need_to_valid_url(url, successful_items): if need_to_valid_url(url, successful_items):
url_validator = LinkValidator(url) url_validator = LinkValidator(url)
print("need to validate url: " + url) print("need to validate url: " + url)
executor.submit(url_validator.start_page, params.get_proxy(ProxyType.BRIGHT_DATA), True) executor.submit(url_validator.start_page, params.get_proxy(ProxyType.BRIGHT_DATA), True)
else: else:
print("do not need to click url --> {}".format(mail.mail_address)) print("do not need to click url --> {}".format(mail.mail_address))
# check whether the url has already been clicked # check whether the url has already been clicked
if __name__ == '__main__': if __name__ == '__main__':
read_mails() read_mails()
+225 -225
View File
@@ -1,225 +1,225 @@
import json import json
import random import random
import string import string
import pandas as pandas import pandas as pandas
import xlsxwriter import xlsxwriter
from src.config import CONTACT_LIST_FILE from src.config import CONTACT_LIST_FILE
from src.db.mongo_manager import MONGO_STORE_MANAGER from src.db.mongo_manager import MONGO_STORE_MANAGER
from src.pojo.contact_pojo import ContactPojo from src.pojo.contact_pojo import ContactPojo
from src.pojo.mail.mail_pojo import MailAddress from src.pojo.mail.mail_pojo import MailAddress
from src.utils.generate_random_passport_id import get_random_passport_id_number from src.utils.generate_random_passport_id import get_random_passport_id_number
phone_number_prefix = ['6'] phone_number_prefix = ['6']
class ExcelHelper: class ExcelHelper:
def __init__(self): def __init__(self):
self._df = pandas.Series() self._df = pandas.Series()
def write_to_exel(self, file_name, data_list: list): def write_to_exel(self, file_name, data_list: list):
new_df = pandas.Series(data_list) new_df = pandas.Series(data_list)
self._df = pandas.concat([self._df, new_df]) self._df = pandas.concat([self._df, new_df])
self._df.to_excel(file_name) self._df.to_excel(file_name)
def read_user_agens(self) -> list: def read_user_agens(self) -> list:
user_agent_in_json = pandas.read_excel( user_agent_in_json = pandas.read_excel(
"/Users/lpan/Documents/workspace/appointment_tool/docs/mobile_user_agent_list.xlsx").to_json( "/Users/lpan/Documents/workspace/appointment_tool/docs/mobile_user_agent_list.xlsx").to_json(
orient='records') orient='records')
user_agent_dict_list = json.loads(user_agent_in_json) user_agent_dict_list = json.loads(user_agent_in_json)
user_agent_list = [] user_agent_list = []
for user_agent_dict in user_agent_dict_list: for user_agent_dict in user_agent_dict_list:
user_agent_str = user_agent_dict['user_agent'] user_agent_str = user_agent_dict['user_agent']
if 'Mozilla' in user_agent_str: if 'Mozilla' in user_agent_str:
if 'Android 5.1' in user_agent_str: if 'Android 5.1' in user_agent_str:
if 'Mac OS' not in user_agent_str: if 'Mac OS' not in user_agent_str:
user_agent_list.append(user_agent_dict['user_agent']) user_agent_list.append(user_agent_dict['user_agent'])
print(user_agent_list) print(user_agent_list)
def read_contacts(self, file_name=CONTACT_LIST_FILE) -> list: def read_contacts(self, file_name=CONTACT_LIST_FILE) -> list:
contact_list_in_json = pandas.read_excel(file_name).to_json(orient='records') contact_list_in_json = pandas.read_excel(file_name).to_json(orient='records')
contact_dict_list = json.loads(contact_list_in_json) contact_dict_list = json.loads(contact_list_in_json)
contact_list = [] contact_list = []
for contact_dict in contact_dict_list: for contact_dict in contact_dict_list:
if contact_dict['name']: if contact_dict['name']:
raw_name = contact_dict['name'].strip() raw_name = contact_dict['name'].strip()
name = raw_name.split(' ') name = raw_name.split(' ')
last_name = name[0] last_name = name[0]
if len(name) == 2: if len(name) == 2:
first_name = name[-1] first_name = name[-1]
else: else:
first_name = ''.join(name[1:len(name)]) first_name = ''.join(name[1:len(name)])
contact = ContactPojo(phone_number=contact_dict['phone'], contact = ContactPojo(phone_number=contact_dict['phone'],
last_name=last_name, last_name=last_name,
first_name=first_name, first_name=first_name,
passport_number=contact_dict['passport'], passport_number=contact_dict['passport'],
mail=contact_dict['email']) mail=contact_dict['email'])
contact_list.append(contact) contact_list.append(contact)
return contact_list return contact_list
def read_mails_and_pwd(self, def read_mails_and_pwd(self,
file_name='/Users/lpan/Desktop/163.xlsx'): file_name='/Users/lpan/Desktop/163.xlsx'):
contact_list = [] contact_list = []
mail_list_in_json = pandas.read_excel(file_name).to_json(orient='records') mail_list_in_json = pandas.read_excel(file_name).to_json(orient='records')
contact_dict_list = json.loads(mail_list_in_json) contact_dict_list = json.loads(mail_list_in_json)
for contact_dict in contact_dict_list: for contact_dict in contact_dict_list:
if contact_dict['mail']: if contact_dict['mail']:
mail = contact_dict['mail'].strip() mail = contact_dict['mail'].strip()
pwd = contact_dict['password'] pwd = contact_dict['password']
contact = MailAddress(mail, pwd) contact = MailAddress(mail, pwd)
contact_list.append(contact) contact_list.append(contact)
return contact_list return contact_list
def read_names(self, file_name=CONTACT_LIST_FILE) -> list: def read_names(self, file_name=CONTACT_LIST_FILE) -> list:
contact_list_in_json = pandas.read_excel(file_name).to_json(orient='records') contact_list_in_json = pandas.read_excel(file_name).to_json(orient='records')
contact_dict_list = json.loads(contact_list_in_json) contact_dict_list = json.loads(contact_list_in_json)
contact_list = [] contact_list = []
count = 2 count = 2
for contact_dict in contact_dict_list: for contact_dict in contact_dict_list:
if contact_dict['name']: if contact_dict['name']:
raw_name = contact_dict['name'].strip() raw_name = contact_dict['name'].strip()
name = raw_name.split(' ') name = raw_name.split(' ')
if len(name) == 1: if len(name) == 1:
name = raw_name.split('\xa0') name = raw_name.split('\xa0')
if len(name) == 1: if len(name) == 1:
print("error in " + str(name)) print("error in " + str(name))
last_name = name[0] last_name = name[0]
if len(name) == 2: if len(name) == 2:
first_name = name[-1] first_name = name[-1]
else: else:
first_name = ''.join(name[1:len(name)]) first_name = ''.join(name[1:len(name)])
contact = ContactPojo(phone_number="", contact = ContactPojo(phone_number="",
last_name=last_name, last_name=last_name,
first_name=first_name, first_name=first_name,
passport_number="", passport_number="",
mail="") mail="")
if len(first_name) == 0: if len(first_name) == 0:
print("first_name is empty: position:" + str(count)) print("first_name is empty: position:" + str(count))
print(name) print(name)
if len(last_name) == 0: if len(last_name) == 0:
print("last_name is empty: position:" + str(count)) print("last_name is empty: position:" + str(count))
count = count + 1 count = count + 1
contact_list.append(contact) contact_list.append(contact)
return contact_list return contact_list
def read_email_pojo(self, file_name=CONTACT_LIST_FILE) -> list: def read_email_pojo(self, file_name=CONTACT_LIST_FILE) -> list:
email_info_in_json = pandas.read_excel(file_name).to_json(orient='records') email_info_in_json = pandas.read_excel(file_name).to_json(orient='records')
contact_dict_list = json.loads(email_info_in_json) contact_dict_list = json.loads(email_info_in_json)
contact_list = [] contact_list = []
count = 0 count = 0
for contact_dict in contact_dict_list: for contact_dict in contact_dict_list:
if contact_dict['email']: if contact_dict['email']:
email = contact_dict['email'].strip() email = contact_dict['email'].strip()
password = contact_dict['code'] password = contact_dict['code']
email_destinaire = MailAddress(email, password) email_destinaire = MailAddress(email, password)
count = count + 1 count = count + 1
contact_list.append(email_destinaire) contact_list.append(email_destinaire)
return contact_list return contact_list
def get_random_phone_numbers(): def get_random_phone_numbers():
length = 8 # number of characters in the string. length = 8 # number of characters in the string.
ran = ''.join(random.choices(string.digits, k=length)) ran = ''.join(random.choices(string.digits, k=length))
id_number = random.choice(phone_number_prefix) + str(ran) id_number = random.choice(phone_number_prefix) + str(ran)
return id_number return id_number
def generate_email_from_name(first_name: str, last_name: str) -> str: def generate_email_from_name(first_name: str, last_name: str) -> str:
length = 2 # number of characters in the string. length = 2 # number of characters in the string.
ran = ''.join(random.choices(string.digits, k=length)) ran = ''.join(random.choices(string.digits, k=length))
separator = ['.', '_', ''] separator = ['.', '_', '']
domains = ['gmail.com', 'hotmail.com', 'yahoo.com', 'aol.com', 'outlook.com', 'hotmail.fr', 'gmx.com', domains = ['gmail.com', 'hotmail.com', 'yahoo.com', 'aol.com', 'outlook.com', 'hotmail.fr', 'gmx.com',
'hotmail.com', 'yahoo.com', 'aol.com', 'hotmail.com'] 'hotmail.com', 'yahoo.com', 'aol.com', 'hotmail.com']
email = "{}{}{}{}@{}".format(last_name.lower(), random.choice(separator), email = "{}{}{}{}@{}".format(last_name.lower(), random.choice(separator),
first_name.replace("-", "").replace("'", "").lower(), ran, first_name.replace("-", "").replace("'", "").lower(), ran,
random.choice(domains)) random.choice(domains))
print(email) print(email)
return email return email
def get_random_id_number() -> str: def get_random_id_number() -> str:
# write_the_valid_profiles_to_excel() # write_the_valid_profiles_to_excel()
S = 8 # number of characters in the string. S = 8 # number of characters in the string.
# call random.choices() string module to find the string in Uppercase + numeric data. # call random.choices() string module to find the string in Uppercase + numeric data.
ran = ''.join(random.choices(string.digits, k=S)) ran = ''.join(random.choices(string.digits, k=S))
print("The randomly generated string is : 94" + str(ran)) # print the random data print("The randomly generated string is : 94" + str(ran)) # print the random data
return ran return ran
def write_new_contacts_to_excel(valid_contacts: list, generate_passport=True): def write_new_contacts_to_excel(valid_contacts: list, generate_passport=True):
row = 0 row = 0
col = 0 col = 0
# Create a workbook and add a worksheet. # Create a workbook and add a worksheet.
workbook = xlsxwriter.Workbook('real_contacts_{}.xlsx'.format(len(valid_contacts))) workbook = xlsxwriter.Workbook('real_contacts_{}.xlsx'.format(len(valid_contacts)))
header_data = ['name', 'phone', 'passport', 'email'] header_data = ['name', 'phone', 'passport', 'email']
worksheet = workbook.add_worksheet() worksheet = workbook.add_worksheet()
header_format = workbook.add_format({'bold': True}) header_format = workbook.add_format({'bold': True})
for col_num, data in enumerate(header_data): for col_num, data in enumerate(header_data):
worksheet.write(row, col_num, data, header_format) worksheet.write(row, col_num, data, header_format)
row = row + 1 row = row + 1
for info in valid_contacts: for info in valid_contacts:
info.phone = get_random_phone_numbers() info.phone = get_random_phone_numbers()
info.passport = get_random_passport_id_number() info.passport = get_random_passport_id_number()
info.mail = generate_email_from_name(info.first_name, info.last_name) info.mail = generate_email_from_name(info.first_name, info.last_name)
# Iterate over the data and write it out row by row. # Iterate over the data and write it out row by row.
worksheet.write(row, col, "{} {}".format(info.last_name, info.first_name)) worksheet.write(row, col, "{} {}".format(info.last_name, info.first_name))
worksheet.write(row, col + 1, info.phone) worksheet.write(row, col + 1, info.phone)
worksheet.write(row, col + 2, info.passport) worksheet.write(row, col + 2, info.passport)
worksheet.write(row, col + 3, info.mail) worksheet.write(row, col + 3, info.mail)
row += 1 row += 1
workbook.close() workbook.close()
def write_destinaire_email(valid_contacts: list, generate_passport=True): def write_destinaire_email(valid_contacts: list, generate_passport=True):
row = 0 row = 0
col = 0 col = 0
# Create a workbook and add a worksheet. # Create a workbook and add a worksheet.
workbook = xlsxwriter.Workbook('real_contacts_{}.xlsx'.format(len(valid_contacts))) workbook = xlsxwriter.Workbook('real_contacts_{}.xlsx'.format(len(valid_contacts)))
header_data = ['name', 'phone', 'passport', 'email'] header_data = ['name', 'phone', 'passport', 'email']
worksheet = workbook.add_worksheet() worksheet = workbook.add_worksheet()
header_format = workbook.add_format({'bold': True}) header_format = workbook.add_format({'bold': True})
for col_num, data in enumerate(header_data): for col_num, data in enumerate(header_data):
worksheet.write(row, col_num, data, header_format) worksheet.write(row, col_num, data, header_format)
row = row + 1 row = row + 1
for info in valid_contacts: for info in valid_contacts:
info.phone = get_random_phone_numbers() info.phone = get_random_phone_numbers()
info.passport = get_random_passport_id_number() info.passport = get_random_passport_id_number()
info.mail = generate_email_from_name(info.first_name, info.last_name) info.mail = generate_email_from_name(info.first_name, info.last_name)
# Iterate over the data and write it out row by row. # Iterate over the data and write it out row by row.
worksheet.write(row, col, "{} {}".format(info.last_name, info.first_name)) worksheet.write(row, col, "{} {}".format(info.last_name, info.first_name))
worksheet.write(row, col + 1, info.phone) worksheet.write(row, col + 1, info.phone)
worksheet.write(row, col + 2, info.passport) worksheet.write(row, col + 2, info.passport)
worksheet.write(row, col + 3, info.mail) worksheet.write(row, col + 3, info.mail)
row += 1 row += 1
workbook.close() workbook.close()
def save_mails_to_db(): def save_mails_to_db():
excel_reader = ExcelHelper() excel_reader = ExcelHelper()
emails = excel_reader.read_email_pojo("/Users/lpan/Downloads/aol_mails_21.xlsx") emails = excel_reader.read_email_pojo("/Users/lpan/Downloads/aol_mails_21.xlsx")
print(emails) print(emails)
for mail in emails: for mail in emails:
MONGO_STORE_MANAGER.save_destinary_emails(mail) MONGO_STORE_MANAGER.save_destinary_emails(mail)
if __name__ == '__main__': if __name__ == '__main__':
excel_reader = ExcelHelper() excel_reader = ExcelHelper()
excel_reader.read_user_agens() excel_reader.read_user_agens()
# contacts = excel_reader.read_names("/Users/lpan/Documents/rdv/backup_500.xlsx") # contacts = excel_reader.read_names("/Users/lpan/Documents/rdv/backup_500.xlsx")
# print(contacts) # print(contacts)
# write_new_contacts_to_excel(valid_contacts=contacts) # write_new_contacts_to_excel(valid_contacts=contacts)
# save_mails_to_db() # save_mails_to_db()
# for mail in excel_reader.read_mails_and_pwd(): # for mail in excel_reader.read_mails_and_pwd():
# MONGO_STORE_MANAGER.insert_email(mail) # MONGO_STORE_MANAGER.insert_email(mail)
# for i in range(1, 64): # for i in range(1, 64):
# print(get_random_phone_numbers()) # print(get_random_phone_numbers())
+461 -410
View File
@@ -1,410 +1,461 @@
import datetime import datetime
import logging import logging
import random import random
import re import re
import sys import sys
import threading import threading
import time import time
import traceback import traceback
from typing import Union from typing import Union
from src import params, definitions from anticaptchaofficial.antigatetask import antigateTask
from src.db.mongo_manager import MONGO_STORE_MANAGER from playwright._impl._api_structures import SetCookieParam
from src.pojo.ModeEnum import ModeEnum from playwright_stealth import stealth_sync
from src.pojo.ReserveResultPojo import ReserveResultPojo, PublishType
from src.pojo.contact_pojo import ContactPojo from src import params, definitions
from src.proxy.proxy_type import ProxyType from src.db.mongo_manager import MONGO_STORE_MANAGER
from src.workers.SolveCaptch import SolveCaptcha from src.pojo.ModeEnum import ModeEnum
from src.workers.TlsPlaywright import TlsPlaywright from src.pojo.ReserveResultPojo import ReserveResultPojo, PublishType
from src.pojo.contact_pojo import ContactPojo
RDV_URL = "https://rendezvousparis.hermes.com/client/register" from src.proxy.proxy_type import ProxyType
from src.workers.SolveCaptch import SolveCaptcha
# RDV_URL = "file:///Users/lpan/Downloads/test_appointment.html" from src.workers.TlsPlaywright import TlsPlaywright
# RDV_URL = "https://api.ipify.org"
# RDV_URL ="https://bot.sannysoft.com/" RDV_URL = "https://rendezvousparis.hermes.com/client/register"
REGEX_RDV_URL = "https:\/\/rendezvousparis\.hermes\.com\/client\/register\/[A-Z0-9]+"
otp_value = None # RDV_URL = "file:///Users/lpan/Downloads/test_appointment.html"
OTP_FIELD_ID = "#sms_code" # RDV_URL = "https://api.ipify.org"
MESSAGE_FIELD_CLASS = ".message" # RDV_URL ="https://bot.sannysoft.com/"
BLANK_URL = "about:blank" REGEX_RDV_URL = "https:\/\/rendezvousparis\.hermes\.com\/client\/register\/[A-Z0-9]+"
CONFIRMED_MESSAGE = "Your request for a Leather Goods appointment has been registered" otp_value = None
CONFIRMED_MESSAGE_FR = "Votre demande de rendez-vous Maroquinerie a bien été enregistrée et nous vous en remercions." OTP_FIELD_ID = "#sms_code"
MESSAGE_URL_VALIDATION_FR = "Nous avons envoyé un lien par e-mail." MESSAGE_FIELD_CLASS = ".message"
DOUBLE_REQUEST_ERROR_MESSAGE = "A request with the same data has already been validated today." BLANK_URL = "about:blank"
DOUBLE_REQUEST_ERROR_MESSAGE_FR = "Une demande avec les données saisies a déjà été validée aujourdhui." CONFIRMED_MESSAGE = "Your request for a Leather Goods appointment has been registered"
TOO_MANY_REQUEST_ERROR_MESSAGE = "Due to a large number of requests" CONFIRMED_MESSAGE_FR = "Votre demande de rendez-vous Maroquinerie a bien été enregistrée et nous vous en remercions."
TOO_MANY_REQUEST_ERROR_MESSAGE_FR = "Suite à un trop grand nombre de demandes" MESSAGE_URL_VALIDATION_FR = "Nous avons envoyé un lien par e-mail."
CAPTCHA_ERROR_MESSAGE = "Error verifying captcha, please try again" DOUBLE_REQUEST_ERROR_MESSAGE = "A request with the same data has already been validated today."
CAPTCHA_ERROR_MESSAGE_FR = "La vérification du captcha a échoué" DOUBLE_REQUEST_ERROR_MESSAGE_FR = "Une demande avec les données saisies a déjà été validée aujourdhui."
TIME_OUT = 10 * 60 * 1000 # 10 mins TOO_MANY_REQUEST_ERROR_MESSAGE = "Due to a large number of requests"
OTP_TIMEOUT = 240 TOO_MANY_REQUEST_ERROR_MESSAGE_FR = "Suite à un trop grand nombre de demandes"
PAGE_TIMEOUT = 40000 CAPTCHA_ERROR_MESSAGE = "Error verifying captcha, please try again"
CAPTCHA_ERROR_MESSAGE_FR = "La vérification du captcha a échoué"
TIME_OUT = 10 * 60 * 1000 # 10 mins
def get_random_wait_time() -> float: OTP_TIMEOUT = 240
wait_time = random.randint(0, 10) / 10.0 * 1 PAGE_TIMEOUT = 40000
return wait_time
def get_random_wait_time() -> float:
class CommandorPage: wait_time = random.randint(0, 10) / 10.0 * 1
tls = TlsPlaywright() return wait_time
def __init__(self, contact: ContactPojo, store_type=0, proxy_type=ProxyType.BRIGHT_DATA,
mode: ModeEnum = ModeEnum.MANUAL, headless=False): class CommandorPage:
self.otp_value = None tls = TlsPlaywright()
self.is_finished = False
self.contact = contact def __init__(self, contact: ContactPojo, store_type=0, proxy_type=ProxyType.BRIGHT_DATA,
self.contact.phone = self.contact.phone.replace(".0", "") # remove the .0 if the Excel format is not correct mode: ModeEnum = ModeEnum.MANUAL, headless=False):
self.logger = logging.getLogger("约会页面:" + str(self.contact.phone)) self.otp_value = None
self.proxy_type = proxy_type self.is_finished = False
self.is_event_sent = False self.current_context = None
self.is_captcha_in_error = False self.contact = contact
self.is_filling_fields = False self.contact.phone = self.contact.phone.replace(".0", "") # remove the .0 if the Excel format is not correct
self.headless = headless self.logger = logging.getLogger("约会页面:" + str(self.contact.phone))
self.appointment_mode = mode self.proxy_type = proxy_type
# 0: random self.is_event_sent = False
# 1: faubourg self.is_captcha_in_error = False
# 2: George self.is_filling_fields = False
# 3: Sèvres self.headless = headless
self.store_map = { self.appointment_mode = mode
1: "faubourg", # 0: random
2: "georgev", # 1: faubourg
3: "sevres" # 2: George
} # 3: Sèvres
self.store_type = store_type self.store_map = {
1: "faubourg",
def on_success(self, result: ReserveResultPojo): 2: "georgev",
self.logger.info("on_success called.") 3: "sevres"
self.is_finished = True }
if not self.is_event_sent: self.store_type = store_type
self.logger.info("will send successful event")
self.logger.info(result) def on_success(self, result: ReserveResultPojo):
params.oracle_log_sender.send_appoint_result(result) self.logger.info("on_success called.")
self.is_event_sent = True self.is_finished = True
if not self.is_event_sent:
def timeout_occurred(self): self.logger.info("will send successful event")
params.oracle_log_sender.send_timeout_log(self.contact) self.logger.info(result)
self.logger.info("will close timeout modem") params.oracle_log_sender.send_appoint_result(result)
self.termine() self.is_event_sent = True
def _run(self, proxy): def timeout_occurred(self):
self.logger.info("will start browser") params.oracle_log_sender.send_timeout_log(self.contact)
self.on_success_listener = on_success self.logger.info("will close timeout modem")
# reset otp_value to None self.termine()
self.otp_value = None
devices = random.choice(params.DEVICES) def _run(self, proxy):
first_page = None self.logger.info("will start browser")
while first_page is None: self.on_success_listener = on_success
first_page = self.start_browser(proxy, self.tls.playwright, devices) # reset otp_value to None
proxy = params.get_proxy(self.proxy_type) self.otp_value = None
# self.thread_event = e devices = random.choice(params.DEVICES)
otp_input = self.page.locator(OTP_FIELD_ID) first_page = None
otp_input.wait_for(state='visible', timeout=TIME_OUT) while first_page is None:
self.logger.info("timeout") first_page = self.start_browser(proxy, self.tls.playwright, devices)
self.termine() proxy = params.get_proxy(self.proxy_type)
# self.thread_event = e
def fill_fields(self): otp_input = self.page.locator(OTP_FIELD_ID)
if not self.is_filling_fields: otp_input.wait_for(state='visible', timeout=TIME_OUT)
self.is_filling_fields = True self.logger.info("timeout")
self.logger.info("填充信息: " + str(self.contact.phone)) self.termine()
self._set_name(self.contact.last_name, self.contact.first_name)
self._setPhoneCountryAndStore() def fill_fields(self):
self._set_phone_number("0" + str(self.contact.phone)) if not self.is_filling_fields:
self._set_email(self.contact.mail) self.is_filling_fields = True
self._set_id_number(self.contact.passport) self.logger.info("填充信息: " + str(self.contact.phone))
self._checkCgu() self._set_name(self.contact.last_name, self.contact.first_name)
if self.appointment_mode == ModeEnum.AUTOMATIC: self._setPhoneCountryAndStore()
self.resolve_captcha() self._set_phone_number("0" + str(self.contact.phone))
self.is_filling_fields = False self._set_email(self.contact.mail)
self._set_id_number(self.contact.passport)
def start_browser(self, proxy, pwright, device) -> Union[str, None]: self._checkCgu()
try: if self.appointment_mode == ModeEnum.AUTOMATIC:
self.browser = pwright.firefox.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy) self.resolve_captcha()
# userAgent = random.choice(params.firefox_user_agent_list) self.is_filling_fields = False
simulated_mobile = pwright.devices[device]
userAgent = simulated_mobile['user_agent'] def start_browser(self, proxy, pwright, device) -> Union[str, None]:
print("user_agent is " + userAgent) try:
# context = self.browser.new_context(**simulated_mobile, locale='fr-FR') self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy)
context = self.browser.new_context(user_agent=userAgent, locale='fr-FR') # userAgent = random.choice(params.firefox_user_agent_list)
self.page = context.new_page() simulated_mobile = pwright.devices[device]
# hide webdriver information userAgent = simulated_mobile['user_agent']
self.page.add_init_script("""() => { print("user_agent is " + userAgent)
Object.defineProperty(navigator,'webdriver',{get: () => undefined}); # context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
Object.defineProperty(navigator, 'platform', { context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
get: () => { self.current_context = context
return "iPhone"; self.create_and_config_page(context)
}}); return self.page.content()
} # self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy)
""") # self.logger.info("模拟设备: " + device)
self.page.on("load", self._on_page_loaded) # simulated_mobile = pwright.devices[device]
self.page.on("response", self.handle_response) # context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
self.page.goto(RDV_URL, timeout=PAGE_TIMEOUT) # self.page = context.new_page()
return self.page.content() # # hide webdriver information
# self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy) # self.page.add_init_script("""() => {
# self.logger.info("模拟设备: " + device) # Object.defineProperty(navigator,'webdriver',{get: () => undefined});
# simulated_mobile = pwright.devices[device] # Object.defineProperty(navigator, 'platform', {
# context = self.browser.new_context(**simulated_mobile, locale='fr-FR') # get: () => {
# self.page = context.new_page() # return "iPhone";
# # hide webdriver information # }});
# self.page.add_init_script("""() => { # }
# Object.defineProperty(navigator,'webdriver',{get: () => undefined}); # """)
# Object.defineProperty(navigator, 'platform', { # self.page.on("load", self._on_page_loaded)
# get: () => { # self.page.on("response", self.handle_response)
# return "iPhone"; # self.page.goto(RDV_URL, timeout=PAGE_TIMEOUT)
# }}); # captcha_url = "geo.captcha-delivery.com/captcha"
# } # if captcha_url in self.page.content():
# """) # self.logger.info("will close browser")
# self.page.on("load", self._on_page_loaded) # self.browser.close()
# self.page.on("response", self.handle_response) # return None
# self.page.goto(RDV_URL, timeout=PAGE_TIMEOUT) # else:
# captcha_url = "geo.captcha-delivery.com/captcha" # return self.page.content()
# if captcha_url in self.page.content(): except Exception as error:
# self.logger.info("will close browser") params.oracle_log_sender.send_error(str(error))
# self.browser.close() traceback.print_exc(*sys.exc_info())
# return None self.logger.exception(error)
# else: self.logger.info("will close browser")
# return self.page.content() self.browser.close()
except Exception as error: return None
params.oracle_log_sender.send_error(str(error))
traceback.print_exc(*sys.exc_info()) def create_and_config_page(self, context):
self.logger.exception(error) self.page = context.new_page()
self.logger.info("will close browser") stealth_sync(self.page)
self.browser.close() # hide webdriver information
return None self.page.add_init_script("""() => {
Object.defineProperty(navigator,'webdriver',{get: () => undefined});
def handle_response(self, response): Object.defineProperty(navigator, 'platform', {
pattern = re.compile(REGEX_RDV_URL) get: () => {
if pattern.match(response.url): return "iPhone";
self.logger.info("result url found: " + response.url) }});
# self.publish_message_to_queue(self.contact, PublishType.PENDING, response.url) }
""")
def start_page(self, proxy): self.page.on("load", self._on_page_loaded)
e = threading.Event() self.page.on("response", self.handle_response)
self._run(proxy) self.page.goto(RDV_URL, timeout=PAGE_TIMEOUT)
def _on_page_loaded(self): def handle_response(self, response):
# time.sleep(40000) pattern = re.compile(REGEX_RDV_URL)
self.logger.info("页面加载完毕") if pattern.match(response.url):
self.logger.info("url is " + self.page.url) self.logger.info("result url found: " + response.url)
captcha_url = "geo.captcha-delivery.com/captcha" # self.publish_message_to_queue(self.contact, PublishType.PENDING, response.url)
if captcha_url not in self.page.content():
if self.page.url == RDV_URL: def start_page(self, proxy):
self.fill_fields() e = threading.Event()
try: self._run(proxy)
message = self.page.content()
if CONFIRMED_MESSAGE_FR in message or MESSAGE_URL_VALIDATION_FR in message: def solve_datadome_captcha(self):
# publish the successful message print("solve_datadome_captcha")
self.publish_message_to_queue(self.contact, PublishType.SUCCESS, self.page.url) solver = antigateTask()
self.get_errors() solver.set_verbose(1)
except Exception as error: solver.set_key("ede6a69396fc961af351e7c8ffda9059")
self.logger.error(error) solver.set_website_url(RDV_URL)
solver.set_template_name("Anti-bot screen bypass")
def on_document_loaded(self): solver.set_variables({
self.logger.info("on_document_loaded called") "css_selector": ".captcha__human__container"
})
def _setPhoneCountryAndStore(self): result = solver.solve_and_return_solution()
try: if result != 0:
if self.store_type == 0: cookies, localStorage, fingerprint, url, domain = result["cookies"], result["localStorage"], result[
self.page.evaluate("""()=>{ "fingerprint"], result["url"], result["domain"]
//document.getElementById("phone_country").focus(); print("cookies: ", cookies)
document.getElementById("phone_country").value = \"FR\" }""") print("localStorage: ", localStorage)
else: print("fingerprint: ", fingerprint)
store_to_choose = self.store_map[self.store_type] print("url: " + url)
self.page.evaluate("""(store_to_choose)=>{ print("domain: " + domain)
document.getElementById("prefer").value = store_to_choose; # add cookies to playwright
//document.getElementById("phone_country").focus(); cookie_list = []
document.getElementById("phone_country").value = \"FR\" }""", store_to_choose) cookie_list.append(SetCookieParam(name='datadome', value=cookies['datadome'], url=url))
except Exception as error: self.page.context.add_cookies(cookie_list)
self.logger.error(error) self.config_page_with_fingerprint(fingerprint)
self.page.reload()
def _set_phone_number(self, phoneNumber): else:
time.sleep(get_random_wait_time()) print("task finished with error " + solver.error_code)
try:
self.page.evaluate("""(phoneNumber)=>document.getElementById("phone_number").value =phoneNumber""", def _on_page_loaded(self):
phoneNumber) # time.sleep(40000)
except Exception as error: self.logger.info("页面加载完毕")
self.logger.error(error) self.logger.info("url is " + self.page.url)
captcha_url = "geo.captcha-delivery.com/captcha"
def _set_name(self, lastName, firstName): if captcha_url not in self.page.content():
time.sleep(get_random_wait_time()) if self.page.url == RDV_URL:
try: self.fill_fields()
self.page.evaluate("""(name)=> { try:
let surname = document.getElementById("surname"); message = self.page.content()
if(surname.value.length == 0){ if CONFIRMED_MESSAGE_FR in message or MESSAGE_URL_VALIDATION_FR in message:
// surname.focus(); # publish the successful message
surname.value = name.lastName; self.publish_message_to_queue(self.contact, PublishType.SUCCESS, self.page.url)
document.getElementById("name").focus(); self.get_errors()
document.getElementById("name").value = name.firstName except Exception as error:
}} self.logger.error(error)
""", {'lastName': lastName, 'firstName': firstName}) # else:
except Exception as error: # self.solve_datadome_captcha()
self.logger.error(error)
def on_document_loaded(self):
def get_errors(self): self.logger.info("on_document_loaded called")
# send error result
if self.page.url != BLANK_URL: def _setPhoneCountryAndStore(self):
# no need to push blank url to db try:
if self.page.url != RDV_URL: if self.store_type == 0:
# no need to push RDV url to db self.page.evaluate("""()=>{
self.publish_message_to_queue(self.contact, PublishType.ERROR, self.page.url) //document.getElementById("phone_country").focus();
try: document.getElementById("phone_country").value = \"FR\" }""")
items = self.page.query_selector("div.alert") else:
if items: store_to_choose = self.store_map[self.store_type]
erro_content = items.inner_html() self.page.evaluate("""(store_to_choose)=>{
self.logger.info("错误:" + erro_content) document.getElementById("prefer").value = store_to_choose;
self._handle_errors(erro_content) //document.getElementById("phone_country").focus();
except Exception as ext: document.getElementById("phone_country").value = \"FR\" }""", store_to_choose)
self.logger.error(ext) except Exception as error:
self.logger.error(error)
def _handle_errors(self, erro_content: str):
if DOUBLE_REQUEST_ERROR_MESSAGE in erro_content or DOUBLE_REQUEST_ERROR_MESSAGE_FR in erro_content: def _set_phone_number(self, phoneNumber):
# this email has been already used time.sleep(get_random_wait_time())
if not self.is_finished: try:
params.oracle_log_sender.send_double_data_error(self.contact) self.page.evaluate("""(phoneNumber)=>document.getElementById("phone_number").value =phoneNumber""",
MONGO_STORE_MANAGER.delete_captcha_error_contact_for_current_day(self.contact) phoneNumber)
self.is_finished = True except Exception as error:
self.termine() self.logger.error(error)
elif TOO_MANY_REQUEST_ERROR_MESSAGE in erro_content or TOO_MANY_REQUEST_ERROR_MESSAGE_FR in erro_content:
# this email is in black list def _set_name(self, lastName, firstName):
if not self.is_finished: time.sleep(get_random_wait_time())
params.oracle_log_sender.send_too_many_error(self.contact) try:
MONGO_STORE_MANAGER.insert_blacklist_contact(self.contact) self.page.evaluate("""(name)=> {
self.is_finished = True let surname = document.getElementById("surname");
self.termine() if(surname.value.length == 0){
elif CAPTCHA_ERROR_MESSAGE in erro_content or CAPTCHA_ERROR_MESSAGE_FR in erro_content: // surname.focus();
# this email has been already used surname.value = name.lastName;
self.is_captcha_in_error = True document.getElementById("name").focus();
if not self.is_finished: document.getElementById("name").value = name.firstName
# save the error to database with contact info }}
self.handle_captcha_error() """, {'lastName': lastName, 'firstName': firstName})
self.is_finished = True except Exception as error:
# no need to retry captcha, if retry ,will generate DOUBLE_REQUEST_ERROR_MESSAGE self.logger.error(error)
self.termine()
# self.resolve_captcha() def get_errors(self):
# send error result
def _set_email(self, email): if self.page.url != BLANK_URL:
time.sleep(get_random_wait_time()) # no need to push blank url to db
try: if self.page.url != RDV_URL:
self.page.evaluate("""(email)=>{ # no need to push RDV url to db
let emailElement = document.getElementById("email") self.publish_message_to_queue(self.contact, PublishType.ERROR, self.page.url)
if(emailElement.value.length == 0){ try:
emailElement.focus(); items = self.page.query_selector("div.alert")
document.getElementById("email").value = email;}}""", email) if items:
except Exception as error: erro_content = items.inner_html()
self.logger.error(error) self.logger.info("错误:" + erro_content)
self._handle_errors(erro_content)
def _set_id_number(self, id): except Exception as ext:
time.sleep(get_random_wait_time()) self.logger.error(ext)
try:
self.page.evaluate(""" (id) =>{ def _handle_errors(self, erro_content: str):
document.getElementById("passport_id").focus(); if DOUBLE_REQUEST_ERROR_MESSAGE in erro_content or DOUBLE_REQUEST_ERROR_MESSAGE_FR in erro_content:
document.getElementById("passport_id").value = id}""", id) # this email has been already used
except Exception as error: if not self.is_finished:
self.logger.error(error) params.oracle_log_sender.send_double_data_error(self.contact)
MONGO_STORE_MANAGER.delete_captcha_error_contact_for_current_day(self.contact)
def _checkCgu(self): self.is_finished = True
try: self.termine()
self.page.evaluate(""" elif TOO_MANY_REQUEST_ERROR_MESSAGE in erro_content or TOO_MANY_REQUEST_ERROR_MESSAGE_FR in erro_content:
document.getElementById("cgu").focus(); # this email is in black list
document.getElementById("cgu").checked = true; if not self.is_finished:
document.getElementById("processing").focus(); params.oracle_log_sender.send_too_many_error(self.contact)
document.getElementById("processing").checked = true""") MONGO_STORE_MANAGER.insert_blacklist_contact(self.contact)
except Exception as error: self.is_finished = True
self.logger.error(error) self.termine()
elif CAPTCHA_ERROR_MESSAGE in erro_content or CAPTCHA_ERROR_MESSAGE_FR in erro_content:
def clickOnValidBtn(self): # this email has been already used
time.sleep(get_random_wait_time()) self.is_captcha_in_error = True
try: if not self.is_finished:
self.page.evaluate("""document.getElementsByClassName("btn")[0].focus();""") # save the error to database with contact info
time.sleep(get_random_wait_time()) self.handle_captcha_error()
self.page.evaluate(""" self.is_finished = True
document.getElementsByClassName("btn")[0].click();""") # no need to retry captcha, if retry ,will generate DOUBLE_REQUEST_ERROR_MESSAGE
except Exception as error: self.termine()
self.logger.error(error) # self.resolve_captcha()
def fill_otp(self, otp: str): def _set_email(self, email):
self.page.focus(OTP_FIELD_ID) time.sleep(get_random_wait_time())
time.sleep(get_random_wait_time()) try:
self.page.fill(OTP_FIELD_ID, otp) self.page.evaluate("""(email)=>{
let emailElement = document.getElementById("email")
def termine(self): if(emailElement.value.length == 0){
self.logger.info("will close browser") emailElement.focus();
time.sleep(1) document.getElementById("email").value = email;}}""", email)
self.browser.close() except Exception as error:
self.logger.error(error)
def publish_message_to_queue(self, contact: ContactPojo, status: PublishType, url: str):
# create the message def _set_id_number(self, id):
id = url.split("/")[-1] time.sleep(get_random_wait_time())
result = ReserveResultPojo(type=status, phone=contact.phone, message=status.value, url=url, try:
firstName=contact.first_name, lastName=contact.last_name, email=contact.mail, self.page.evaluate(""" (id) =>{
passport=contact.passport, ccid=contact.ccid) document.getElementById("passport_id").focus();
result.id = id document.getElementById("passport_id").value = id}""", id)
result.store_type = self.store_type except Exception as error:
definitions.firebase_store_manager.save(result) self.logger.error(error)
collection_name = str(datetime.date.today())
MONGO_STORE_MANAGER.insert_reserve_result(collection_name=collection_name, reserve=result) def _checkCgu(self):
MONGO_STORE_MANAGER.delete_captcha_error_contact_for_current_day(self.contact) try:
MONGO_STORE_MANAGER.remove_contact_from_black_list(self.contact) self.page.evaluate("""
if status is PublishType.SUCCESS: document.getElementById("cgu").focus();
self.on_success(result) document.getElementById("cgu").checked = true;
time.sleep(2) document.getElementById("processing").focus();
self.browser.close() document.getElementById("processing").checked = true""")
except Exception as error:
def resolve_captcha(self): self.logger.error(error)
self.captcha_solver = SolveCaptcha(self.page)
self.captcha_solver.start(self.fill_captcha_solution) def clickOnValidBtn(self):
time.sleep(get_random_wait_time())
def fill_captcha_solution(self, solution): try:
self.logger.info("will input solution") self.page.evaluate("""document.getElementsByClassName("btn")[0].focus();""")
try: time.sleep(get_random_wait_time())
self.page.evaluate("""(solution)=>{ self.page.evaluate("""
document.getElementById("g-recaptcha-response").innerHTML=solution;}""", solution) document.getElementsByClassName("btn")[0].click();""")
self.logger.info("will click on valid btn") except Exception as error:
self.clickOnValidBtn() self.logger.error(error)
# wait for 20s
time.sleep(20) def fill_otp(self, otp: str):
if not self.is_finished: self.page.focus(OTP_FIELD_ID)
if not self.is_captcha_in_error: time.sleep(get_random_wait_time())
self.clickOnValidBtn() self.page.fill(OTP_FIELD_ID, otp)
else:
self.is_captcha_in_error = False def termine(self):
self.logger.info("will close browser")
except Exception as error: time.sleep(1)
self.logger.error(error) self.browser.close()
self.page.reload(timeout=PAGE_TIMEOUT)
def publish_message_to_queue(self, contact: ContactPojo, status: PublishType, url: str):
def handle_captcha_error(self): # create the message
MONGO_STORE_MANAGER.insert_captcha_error_contact(self.contact) id = url.split("/")[-1]
params.oracle_log_sender.send_captcha_error(self.contact) result = ReserveResultPojo(type=status, phone=contact.phone, message=status.value, url=url,
firstName=contact.first_name, lastName=contact.last_name, email=contact.mail,
passport=contact.passport, ccid=contact.ccid)
def on_success(result: ReserveResultPojo): result.id = id
pass result.store_type = self.store_type
definitions.firebase_store_manager.save(result)
collection_name = str(datetime.date.today())
def launch_page(): MONGO_STORE_MANAGER.insert_reserve_result(collection_name=collection_name, reserve=result)
contact = ContactPojo(phone_number="+33758912245", passport_number="82546975", last_name="XU", MONGO_STORE_MANAGER.delete_captcha_error_contact_for_current_day(self.contact)
first_name="xingzhen", MONGO_STORE_MANAGER.remove_contact_from_black_list(self.contact)
mail="ColbyPatel653@gmail.com", ccid="", position=0) if status is PublishType.SUCCESS:
page = CommandorPage(contact, store_type=1) self.on_success(result)
return page.start_page(params.get_proxy(ProxyType.BRIGHT_DATA)) time.sleep(2)
self.browser.close()
def wait_for_otp(event: threading.Event, commandor: CommandorPage): def resolve_captcha(self):
sec = input("Press Enter otp to continue...\n") self.captcha_solver = SolveCaptcha(self.page)
print("input otp is: " + sec) self.captcha_solver.start(self.fill_captcha_solution)
commandor.otp_value = sec
event.set() def fill_captcha_solution(self, solution):
self.logger.info("will input solution")
try:
if __name__ == '__main__': self.page.evaluate("""(solution)=>{
launch_page() document.getElementById("g-recaptcha-response").innerHTML=solution;}""", solution)
# time = get_random_wait_time() self.logger.info("will click on valid btn")
# print(time) self.clickOnValidBtn()
# import urllib.request # wait for 20s
# time.sleep(20)
# proxy = urllib.request.ProxyHandler({'https': 'http://panleicim-res-fr-121:94sY7zwBG13i@gw.ntnt.io:5959'}) if not self.is_finished:
# opener = urllib.request.build_opener(proxy) if not self.is_captcha_in_error:
# urllib.request.install_opener(opener) self.clickOnValidBtn()
# content = urllib.request.urlopen('https://api.ipify.org').read() else:
# print(content) self.is_captcha_in_error = False
except Exception as error:
self.logger.error(error)
self.page.reload(timeout=PAGE_TIMEOUT)
def handle_captcha_error(self):
MONGO_STORE_MANAGER.insert_captcha_error_contact(self.contact)
params.oracle_log_sender.send_captcha_error(self.contact)
def config_page_with_fingerprint(self, fingerprint):
self.page.add_init_script("""() => {
Object.defineProperty(navigator,'webdriver',{get: () => undefined});
Object.defineProperty(navigator, 'platform', {
get: () => {
return "iPhone";
}});
}
""")
def on_success(result: ReserveResultPojo):
pass
def launch_page():
contact = ContactPojo(phone_number="+33758912245", passport_number="82546975", last_name="XU",
first_name="xingzhen",
mail="ColbyPatel653@gmail.com", ccid="", position=0)
page = CommandorPage(contact, store_type=1)
return page.start_page(params.get_proxy(ProxyType.BRIGHT_DATA))
def wait_for_otp(event: threading.Event, commandor: CommandorPage):
sec = input("Press Enter otp to continue...\n")
print("input otp is: " + sec)
commandor.otp_value = sec
event.set()
if __name__ == '__main__':
launch_page()
# time = get_random_wait_time()
# print(time)
# import urllib.request
#
# proxy = urllib.request.ProxyHandler({'https': 'http://panleicim-res-fr-121:94sY7zwBG13i@gw.ntnt.io:5959'})
# opener = urllib.request.build_opener(proxy)
# urllib.request.install_opener(opener)
# content = urllib.request.urlopen('https://api.ipify.org').read()
# print(content)
+135 -135
View File
@@ -1,135 +1,135 @@
import logging import logging
import random import random
import traceback import traceback
from typing import Union from typing import Union
import sys import sys
import time import time
from src import params from src import params
from src.db.mongo_manager import MONGO_STORE_MANAGER from src.db.mongo_manager import MONGO_STORE_MANAGER
from src.pojo.ReserveResultPojo import PublishType from src.pojo.ReserveResultPojo import PublishType
from src.proxy.proxy_type import ProxyType from src.proxy.proxy_type import ProxyType
from src.workers.TlsPlaywright import TlsPlaywright from src.workers.TlsPlaywright import TlsPlaywright
OTP_FIELD_ID = "#sms_code" OTP_FIELD_ID = "#sms_code"
TIME_OUT = 10 * 60 * 1000 # 10 mins TIME_OUT = 10 * 60 * 1000 # 10 mins
PAGE_TIMEOUT = 40000 PAGE_TIMEOUT = 40000
CONFIRMED_MESSAGE_FR = "Votre demande de rendez-vous Maroquinerie a bien été enregistrée et nous vous en remercions." CONFIRMED_MESSAGE_FR = "Votre demande de rendez-vous Maroquinerie a bien été enregistrée et nous vous en remercions."
SORRY_SENTENCE_FR = "nous sommes sincèrement désolés de n'avoir pu vous satisfaire cette fois-ci" SORRY_SENTENCE_FR = "nous sommes sincèrement désolés de n'avoir pu vous satisfaire cette fois-ci"
DOUBLE_REQUEST_ERROR_MESSAGE_FR = "Une demande avec les données saisies a déjà été validée aujourdhui." DOUBLE_REQUEST_ERROR_MESSAGE_FR = "Une demande avec les données saisies a déjà été validée aujourdhui."
class LinkValidator: class LinkValidator:
tls = TlsPlaywright() tls = TlsPlaywright()
def __init__(self, link: str, proxy_type=ProxyType.BRIGHT_DATA, headless=False): def __init__(self, link: str, proxy_type=ProxyType.BRIGHT_DATA, headless=False):
self.is_finished = False self.is_finished = False
self.link = link self.link = link
self.proxy_type = proxy_type self.proxy_type = proxy_type
self.is_event_sent = False self.is_event_sent = False
self.is_captcha_in_error = False self.is_captcha_in_error = False
self.is_filling_fields = False self.is_filling_fields = False
self.headless = headless self.headless = headless
self.logger = logging.getLogger("LinkValidator") self.logger = logging.getLogger("LinkValidator")
def on_success(self): def on_success(self):
self.logger.info("on_success called.") self.logger.info("on_success called.")
self.is_finished = True self.is_finished = True
if not self.is_event_sent: if not self.is_event_sent:
self.logger.info("will send successful event") self.logger.info("will send successful event")
params.oracle_log_sender.send_url_validation_result() params.oracle_log_sender.send_url_validation_result()
self.is_event_sent = True self.is_event_sent = True
def timeout_occurred(self): def timeout_occurred(self):
params.oracle_log_sender.send_timeout_log(self.link) params.oracle_log_sender.send_timeout_log(self.link)
self.logger.info("will close timeout modem") self.logger.info("will close timeout modem")
self.termine() self.termine()
def _run(self, proxy): def _run(self, proxy):
self.logger.info("will start browser") self.logger.info("will start browser")
# reset otp_value to None # reset otp_value to None
devices = random.choice(params.DEVICES) devices = random.choice(params.DEVICES)
first_page = None first_page = None
while first_page is None: while first_page is None:
first_page = self.start_browser(proxy, self.tls.playwright, devices) first_page = self.start_browser(proxy, self.tls.playwright, devices)
proxy = params.get_proxy(self.proxy_type) proxy = params.get_proxy(self.proxy_type)
otp_input = self.page.locator(OTP_FIELD_ID) otp_input = self.page.locator(OTP_FIELD_ID)
otp_input.wait_for(state='visible', timeout=TIME_OUT) otp_input.wait_for(state='visible', timeout=TIME_OUT)
self.logger.info("timeout") self.logger.info("timeout")
self.termine() self.termine()
def start_browser(self, proxy, pwright, device) -> Union[str, None]: def start_browser(self, proxy, pwright, device) -> Union[str, None]:
try: try:
self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy) self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy)
self.logger.info("模拟设备: " + device) self.logger.info("模拟设备: " + device)
simulated_mobile = pwright.devices[device] simulated_mobile = pwright.devices[device]
context = self.browser.new_context(**simulated_mobile, locale='fr-FR') context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
self.page = context.new_page() self.page = context.new_page()
# hide webdriver information # hide webdriver information
self.page.add_init_script("""() => { self.page.add_init_script("""() => {
Object.defineProperty(navigator,'webdriver',{get: () => undefined}); Object.defineProperty(navigator,'webdriver',{get: () => undefined});
Object.defineProperty(navigator, 'platform', { Object.defineProperty(navigator, 'platform', {
get: () => { get: () => {
return "iPhone"; return "iPhone";
}}); }});
} }
""") """)
self.page.on("load", self._on_page_loaded) self.page.on("load", self._on_page_loaded)
self.page.goto(self.link, timeout=PAGE_TIMEOUT) self.page.goto(self.link, timeout=PAGE_TIMEOUT)
captcha_url = "geo.captcha-delivery.com/captcha" captcha_url = "geo.captcha-delivery.com/captcha"
if captcha_url in self.page.content(): if captcha_url in self.page.content():
self.logger.info("will close browser") self.logger.info("will close browser")
self.browser.close() self.browser.close()
return None return None
else: else:
return self.page.content() return self.page.content()
except Exception as error: except Exception as error:
params.oracle_log_sender.send_error(str(error)) params.oracle_log_sender.send_error(str(error))
traceback.print_exc(*sys.exc_info()) traceback.print_exc(*sys.exc_info())
self.logger.exception(error) self.logger.exception(error)
self.logger.info("will close browser") self.logger.info("will close browser")
self.browser.close() self.browser.close()
return None return None
def start_page(self, proxy, headless=False): def start_page(self, proxy, headless=False):
self.headless = headless self.headless = headless
self._run(proxy) self._run(proxy)
def _on_page_loaded(self): def _on_page_loaded(self):
self.logger.info("页面加载完毕") self.logger.info("页面加载完毕")
self.logger.info("url is " + self.page.url) self.logger.info("url is " + self.page.url)
try: try:
message = self.page.content() message = self.page.content()
if CONFIRMED_MESSAGE_FR in message: if CONFIRMED_MESSAGE_FR in message:
# publish the successful message # publish the successful message
self.publish_message_to_queue(PublishType.SUCCESS) self.publish_message_to_queue(PublishType.SUCCESS)
elif SORRY_SENTENCE_FR in message: elif SORRY_SENTENCE_FR in message:
# publish the successful message # publish the successful message
self.publish_message_to_queue(PublishType.SUCCESS) self.publish_message_to_queue(PublishType.SUCCESS)
elif DOUBLE_REQUEST_ERROR_MESSAGE_FR in message: elif DOUBLE_REQUEST_ERROR_MESSAGE_FR in message:
# publish the successful message # publish the successful message
self.publish_message_to_queue(PublishType.DUPLICATED) self.publish_message_to_queue(PublishType.DUPLICATED)
except Exception as error: except Exception as error:
self.logger.error(error) self.logger.error(error)
def on_document_loaded(self): def on_document_loaded(self):
self.logger.info("on_document_loaded called") self.logger.info("on_document_loaded called")
def _handle_errors(self, erro_content: str): def _handle_errors(self, erro_content: str):
pass pass
def termine(self): def termine(self):
self.logger.info("will close browser") self.logger.info("will close browser")
time.sleep(1) time.sleep(1)
self.browser.close() self.browser.close()
def publish_message_to_queue(self, status: PublishType): def publish_message_to_queue(self, status: PublishType):
# create the message # create the message
if status is PublishType.SUCCESS: if status is PublishType.SUCCESS:
MONGO_STORE_MANAGER.link_validated_for_result(self.page.url) MONGO_STORE_MANAGER.link_validated_for_result(self.page.url)
else: else:
MONGO_STORE_MANAGER.link_validated_for_result(self.page.url, state=status.name) MONGO_STORE_MANAGER.link_validated_for_result(self.page.url, state=status.name)
self.on_success() self.on_success()
time.sleep(2) time.sleep(2)
self.browser.close() self.browser.close()