try playwright stelth

This commit is contained in:
Lei PAN
2022-09-01 16:22:39 +02:00
parent b99737e7e4
commit 7bcae96a8f
5 changed files with 1011 additions and 958 deletions
+17 -15
View File
@@ -1,16 +1,18 @@
dataclasses_json==0.5.7
firebase_admin==5.2.0
pandas~=1.3.5
playwright==1.23.0
dataclasses~=0.6
pymongo==4.1.1
oci~=2.54.1
XlsxWriter~=3.0.3
boto3~=1.21.13
openpyxl==3.0.9
google-cloud-firestore==2.4.0
PySimpleGUI==4.60.1
SQLAlchemy~=1.4.37
requests~=2.27.1
sqlalchemy_utils~=0.38.2
dataclasses_json==0.5.7
firebase_admin==5.2.0
pandas~=1.3.5
playwright==1.25.0
dataclasses~=0.6
pymongo==4.1.1
anticaptchaofficial==1.0.43
oci~=2.54.1
XlsxWriter~=3.0.3
playwright-stealth==1.0.5
boto3~=1.21.13
openpyxl==3.0.9
google-cloud-firestore==2.4.0
PySimpleGUI==4.60.1
SQLAlchemy~=1.4.37
requests~=2.27.1
sqlalchemy_utils~=0.38.2
Mako~=1.2.0
+173 -173
View File
@@ -1,173 +1,173 @@
import datetime
import email
import imaplib
import re
from concurrent.futures import ThreadPoolExecutor
from email.header import decode_header
from email.message import Message
from builtins import list
from src import params
from src.db.mongo_manager import MONGO_STORE_MANAGER
from src.pojo.mail.mail_pojo import MailPojo, MailAddress
from src.proxy.proxy_type import ProxyType
from src.workers.link_validator import LinkValidator
AOL_IMAP_SERVER = "imap.aol.com"
IMAP_SERVER_163 = "imap.163.com"
VALIDATION_URL_SUBJECT = 'Validation de votre demande de rendez-vous'
VALIDATION_URL_REGEX = """https:\/\/rendezvousparis.hermes.com\/client\/register\/[A-Z0-9]+\/validate.code=[A-Z0-9]+"""
HERMES_EMAIL = "no-reply@hermes.com"
DOMAIN_163 = "163.com"
date_format = "%d-%b-%Y" # DD-Mon-YYYY e.g., 3-Mar-2014
class MailReader():
def __init__(self, login, password):
self.login = login
self.password = password
@staticmethod
def show_folders(imap):
for i in imap.list()[1]:
l = i.decode().split(' "/" ')
print(l[0] + " = " + l[1])
def read_emails(self, mails_messages: list) -> list:
# create an IMAP4 class with SSL
if DOMAIN_163 in self.login:
imap = imaplib.IMAP4_SSL(IMAP_SERVER_163)
else:
imap = imaplib.IMAP4_SSL(AOL_IMAP_SERVER)
# authenticate
imap.login(self.login, self.password)
mail_list = []
print("read mails from {}".format(self.login))
# self.show_folders(imap)
# total number of emails
# get mails from inbox
# (\Archive \HasNoChildren) = "Archive"
# (\Junk \HasNoChildren) = "Bulk"
# (\Drafts \HasNoChildren) = "Draft"
# (\HasNoChildren) = "Inbox"
# (\Sent \HasNoChildren) = "Sent"
# (\Trash \HasNoChildren) = "Trash"
mail_list.extend(self._get_messages_from_folder(imap))
# mail_list.extend(self._get_messages_from_folder(imap, folder="Bulk"))
# close the connection and logout
imap.close()
imap.logout()
mails_messages.extend(mail_list)
return mail_list
def _get_messages_from_folder(self, imap, folder="INBOX") -> list:
imap.select(folder)
mail_messages = []
typ, data = imap.search(None, '(SUBJECT "{}" SINCE "{}")'.format(VALIDATION_URL_SUBJECT,
datetime.datetime.today().strftime(
date_format)))
for i in data[0].split():
# fetch the email message by ID
res, msg = imap.fetch(i.decode("utf-8"), "(RFC822)")
body = ''
for response in msg:
if isinstance(response, tuple):
# parse a bytes email into a message object
msg = email.message_from_bytes(response[1])
# decode the email subject
subject, subject_encoded = decode_header(msg["Subject"])[0]
received_date = msg["Date"]
if isinstance(subject, bytes):
# if it's a bytes, decode to str
subject = subject.decode(subject_encoded)
# decode email sender
from_address, subject_encoded = decode_header(msg.get("From"))[0]
if isinstance(from_address, bytes):
from_address = from_address.decode(subject_encoded)
print("From:", from_address)
print("Subject:", subject)
# if the email message is multipart
if msg.is_multipart():
# iterate over email parts
for part in msg.walk():
try:
# get the email body
payloads = part.get_payload()
if isinstance(payloads, list):
for payload in payloads:
if isinstance(payload, Message):
body = body + payload.get_payload(decode=True).decode("iso-8859-1")
# print(body)
except Exception as Error:
print(Error)
else:
body = msg.get_payload(decode=True).decode()
print(body)
if VALIDATION_URL_SUBJECT in subject:
mail = MailPojo(subject=subject, body=body, from_address=from_address)
mail_messages.append(mail)
return mail_messages
def clean(text):
# clean text for creating a folder
return "".join(c if c.isalnum() else "_" for c in text)
def need_to_valid_url(url: str, successful_items) -> bool:
print("url is :" + url)
parts = url.split('/')
id = parts[5]
if len(id) == 6:
for item in successful_items:
# if item.url_validated is not None:
# print("id:{}, status:{} ".format(id, str(item.url_validated)))
if item.id == id:
if item.url_validated is not None:
return not item.url_validated
else:
# if url_validated is None
return True
# return True by default
return False
else:
print("id not valid:{}".format(id))
return False
def read_mails():
# get email address
mail_list = MONGO_STORE_MANAGER.get_destination_emails()
# # mail_address1 = MailAddress(mail="appointment2022@aol.com", password="gyilpmvyyvlcaviq")
# # mail_address2 = MailAddress(mail="chenpeijun@aol.com", password="ytifuwguknzifqyb")
# # mail_address2 = MailAddress(mail="sdfgfhgf1986@aol.com", password="fjwcgvhxxlywqfwm")
# # mail_address3 = MailAddress(mail="ciyuexie@aol.com", password="czezlmmyypokdfce")
# # mail_address4 = MailAddress(mail="hongjiang176@aol.com", password="ftzpscgzvwneelmn")
# mail_address4 = MailAddress(mail="ribka_puchkova@aol.com", password="aqvsozoyifbixtdc")
# mail_list = [mail_address3, mail_address2, mail_address1, mail_address4]
# mail_list = [mail_address4]
successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
mails_messages = []
with ThreadPoolExecutor(max_workers=20) as executor:
for mail in mail_list:
mail_reader = MailReader(mail.mail, mail.password)
executor.submit(mail_reader.read_emails, mails_messages)
with ThreadPoolExecutor(max_workers=20) as executor:
for mail in mails_messages:
match = re.search(VALIDATION_URL_REGEX, mail.body)
if match:
url = match.group(0)
if need_to_valid_url(url, successful_items):
url_validator = LinkValidator(url)
print("need to validate url: " + url)
executor.submit(url_validator.start_page, params.get_proxy(ProxyType.BRIGHT_DATA), True)
else:
print("do not need to click url --> {}".format(mail.mail_address))
# check whether the url has already been clicked
if __name__ == '__main__':
read_mails()
import datetime
import email
import imaplib
import re
from concurrent.futures import ThreadPoolExecutor
from email.header import decode_header
from email.message import Message
from builtins import list
from src import params
from src.db.mongo_manager import MONGO_STORE_MANAGER
from src.pojo.mail.mail_pojo import MailPojo, MailAddress
from src.proxy.proxy_type import ProxyType
from src.workers.link_validator import LinkValidator
AOL_IMAP_SERVER = "imap.aol.com"
IMAP_SERVER_163 = "imap.163.com"
VALIDATION_URL_SUBJECT = 'Validation de votre demande de rendez-vous'
VALIDATION_URL_REGEX = """https:\/\/rendezvousparis.hermes.com\/client\/register\/[A-Z0-9]+\/validate.code=[A-Z0-9]+"""
HERMES_EMAIL = "no-reply@hermes.com"
DOMAIN_163 = "163.com"
date_format = "%d-%b-%Y" # DD-Mon-YYYY e.g., 3-Mar-2014
class MailReader():
def __init__(self, login, password):
self.login = login
self.password = password
@staticmethod
def show_folders(imap):
for i in imap.list()[1]:
l = i.decode().split(' "/" ')
print(l[0] + " = " + l[1])
def read_emails(self, mails_messages: list) -> list:
# create an IMAP4 class with SSL
if DOMAIN_163 in self.login:
imap = imaplib.IMAP4_SSL(IMAP_SERVER_163)
else:
imap = imaplib.IMAP4_SSL(AOL_IMAP_SERVER)
# authenticate
imap.login(self.login, self.password)
mail_list = []
print("read mails from {}".format(self.login))
# self.show_folders(imap)
# total number of emails
# get mails from inbox
# (\Archive \HasNoChildren) = "Archive"
# (\Junk \HasNoChildren) = "Bulk"
# (\Drafts \HasNoChildren) = "Draft"
# (\HasNoChildren) = "Inbox"
# (\Sent \HasNoChildren) = "Sent"
# (\Trash \HasNoChildren) = "Trash"
mail_list.extend(self._get_messages_from_folder(imap))
# mail_list.extend(self._get_messages_from_folder(imap, folder="Bulk"))
# close the connection and logout
imap.close()
imap.logout()
mails_messages.extend(mail_list)
return mail_list
def _get_messages_from_folder(self, imap, folder="INBOX") -> list:
imap.select(folder)
mail_messages = []
typ, data = imap.search(None, '(SUBJECT "{}" SINCE "{}")'.format(VALIDATION_URL_SUBJECT,
datetime.datetime.today().strftime(
date_format)))
for i in data[0].split():
# fetch the email message by ID
res, msg = imap.fetch(i.decode("utf-8"), "(RFC822)")
body = ''
for response in msg:
if isinstance(response, tuple):
# parse a bytes email into a message object
msg = email.message_from_bytes(response[1])
# decode the email subject
subject, subject_encoded = decode_header(msg["Subject"])[0]
received_date = msg["Date"]
if isinstance(subject, bytes):
# if it's a bytes, decode to str
subject = subject.decode(subject_encoded)
# decode email sender
from_address, subject_encoded = decode_header(msg.get("From"))[0]
if isinstance(from_address, bytes):
from_address = from_address.decode(subject_encoded)
print("From:", from_address)
print("Subject:", subject)
# if the email message is multipart
if msg.is_multipart():
# iterate over email parts
for part in msg.walk():
try:
# get the email body
payloads = part.get_payload()
if isinstance(payloads, list):
for payload in payloads:
if isinstance(payload, Message):
body = body + payload.get_payload(decode=True).decode("iso-8859-1")
# print(body)
except Exception as Error:
print(Error)
else:
body = msg.get_payload(decode=True).decode()
print(body)
if VALIDATION_URL_SUBJECT in subject:
mail = MailPojo(subject=subject, body=body, from_address=from_address)
mail_messages.append(mail)
return mail_messages
def clean(text):
# clean text for creating a folder
return "".join(c if c.isalnum() else "_" for c in text)
def need_to_valid_url(url: str, successful_items) -> bool:
print("url is :" + url)
parts = url.split('/')
id = parts[5]
if len(id) == 6:
for item in successful_items:
# if item.url_validated is not None:
# print("id:{}, status:{} ".format(id, str(item.url_validated)))
if item.id == id:
if item.url_validated is not None:
return not item.url_validated
else:
# if url_validated is None
return True
# return True by default
return False
else:
print("id not valid:{}".format(id))
return False
def read_mails():
# get email address
mail_list = MONGO_STORE_MANAGER.get_destination_emails()
# # mail_address1 = MailAddress(mail="appointment2022@aol.com", password="gyilpmvyyvlcaviq")
# # mail_address2 = MailAddress(mail="chenpeijun@aol.com", password="ytifuwguknzifqyb")
# # mail_address2 = MailAddress(mail="sdfgfhgf1986@aol.com", password="fjwcgvhxxlywqfwm")
# # mail_address3 = MailAddress(mail="ciyuexie@aol.com", password="czezlmmyypokdfce")
# # mail_address4 = MailAddress(mail="hongjiang176@aol.com", password="ftzpscgzvwneelmn")
# mail_address4 = MailAddress(mail="ribka_puchkova@aol.com", password="aqvsozoyifbixtdc")
# mail_list = [mail_address3, mail_address2, mail_address1, mail_address4]
# mail_list = [mail_address4]
successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
mails_messages = []
with ThreadPoolExecutor(max_workers=20) as executor:
for mail in mail_list:
mail_reader = MailReader(mail.mail, mail.password)
executor.submit(mail_reader.read_emails, mails_messages)
with ThreadPoolExecutor(max_workers=20) as executor:
for mail in mails_messages:
match = re.search(VALIDATION_URL_REGEX, mail.body)
if match:
url = match.group(0)
if need_to_valid_url(url, successful_items):
url_validator = LinkValidator(url)
print("need to validate url: " + url)
executor.submit(url_validator.start_page, params.get_proxy(ProxyType.BRIGHT_DATA), True)
else:
print("do not need to click url --> {}".format(mail.mail_address))
# check whether the url has already been clicked
if __name__ == '__main__':
read_mails()
+225 -225
View File
@@ -1,225 +1,225 @@
import json
import random
import string
import pandas as pandas
import xlsxwriter
from src.config import CONTACT_LIST_FILE
from src.db.mongo_manager import MONGO_STORE_MANAGER
from src.pojo.contact_pojo import ContactPojo
from src.pojo.mail.mail_pojo import MailAddress
from src.utils.generate_random_passport_id import get_random_passport_id_number
phone_number_prefix = ['6']
class ExcelHelper:
def __init__(self):
self._df = pandas.Series()
def write_to_exel(self, file_name, data_list: list):
new_df = pandas.Series(data_list)
self._df = pandas.concat([self._df, new_df])
self._df.to_excel(file_name)
def read_user_agens(self) -> list:
user_agent_in_json = pandas.read_excel(
"/Users/lpan/Documents/workspace/appointment_tool/docs/mobile_user_agent_list.xlsx").to_json(
orient='records')
user_agent_dict_list = json.loads(user_agent_in_json)
user_agent_list = []
for user_agent_dict in user_agent_dict_list:
user_agent_str = user_agent_dict['user_agent']
if 'Mozilla' in user_agent_str:
if 'Android 5.1' in user_agent_str:
if 'Mac OS' not in user_agent_str:
user_agent_list.append(user_agent_dict['user_agent'])
print(user_agent_list)
def read_contacts(self, file_name=CONTACT_LIST_FILE) -> list:
contact_list_in_json = pandas.read_excel(file_name).to_json(orient='records')
contact_dict_list = json.loads(contact_list_in_json)
contact_list = []
for contact_dict in contact_dict_list:
if contact_dict['name']:
raw_name = contact_dict['name'].strip()
name = raw_name.split(' ')
last_name = name[0]
if len(name) == 2:
first_name = name[-1]
else:
first_name = ''.join(name[1:len(name)])
contact = ContactPojo(phone_number=contact_dict['phone'],
last_name=last_name,
first_name=first_name,
passport_number=contact_dict['passport'],
mail=contact_dict['email'])
contact_list.append(contact)
return contact_list
def read_mails_and_pwd(self,
file_name='/Users/lpan/Desktop/163.xlsx'):
contact_list = []
mail_list_in_json = pandas.read_excel(file_name).to_json(orient='records')
contact_dict_list = json.loads(mail_list_in_json)
for contact_dict in contact_dict_list:
if contact_dict['mail']:
mail = contact_dict['mail'].strip()
pwd = contact_dict['password']
contact = MailAddress(mail, pwd)
contact_list.append(contact)
return contact_list
def read_names(self, file_name=CONTACT_LIST_FILE) -> list:
contact_list_in_json = pandas.read_excel(file_name).to_json(orient='records')
contact_dict_list = json.loads(contact_list_in_json)
contact_list = []
count = 2
for contact_dict in contact_dict_list:
if contact_dict['name']:
raw_name = contact_dict['name'].strip()
name = raw_name.split(' ')
if len(name) == 1:
name = raw_name.split('\xa0')
if len(name) == 1:
print("error in " + str(name))
last_name = name[0]
if len(name) == 2:
first_name = name[-1]
else:
first_name = ''.join(name[1:len(name)])
contact = ContactPojo(phone_number="",
last_name=last_name,
first_name=first_name,
passport_number="",
mail="")
if len(first_name) == 0:
print("first_name is empty: position:" + str(count))
print(name)
if len(last_name) == 0:
print("last_name is empty: position:" + str(count))
count = count + 1
contact_list.append(contact)
return contact_list
def read_email_pojo(self, file_name=CONTACT_LIST_FILE) -> list:
email_info_in_json = pandas.read_excel(file_name).to_json(orient='records')
contact_dict_list = json.loads(email_info_in_json)
contact_list = []
count = 0
for contact_dict in contact_dict_list:
if contact_dict['email']:
email = contact_dict['email'].strip()
password = contact_dict['code']
email_destinaire = MailAddress(email, password)
count = count + 1
contact_list.append(email_destinaire)
return contact_list
def get_random_phone_numbers():
length = 8 # number of characters in the string.
ran = ''.join(random.choices(string.digits, k=length))
id_number = random.choice(phone_number_prefix) + str(ran)
return id_number
def generate_email_from_name(first_name: str, last_name: str) -> str:
length = 2 # number of characters in the string.
ran = ''.join(random.choices(string.digits, k=length))
separator = ['.', '_', '']
domains = ['gmail.com', 'hotmail.com', 'yahoo.com', 'aol.com', 'outlook.com', 'hotmail.fr', 'gmx.com',
'hotmail.com', 'yahoo.com', 'aol.com', 'hotmail.com']
email = "{}{}{}{}@{}".format(last_name.lower(), random.choice(separator),
first_name.replace("-", "").replace("'", "").lower(), ran,
random.choice(domains))
print(email)
return email
def get_random_id_number() -> str:
# write_the_valid_profiles_to_excel()
S = 8 # number of characters in the string.
# call random.choices() string module to find the string in Uppercase + numeric data.
ran = ''.join(random.choices(string.digits, k=S))
print("The randomly generated string is : 94" + str(ran)) # print the random data
return ran
def write_new_contacts_to_excel(valid_contacts: list, generate_passport=True):
row = 0
col = 0
# Create a workbook and add a worksheet.
workbook = xlsxwriter.Workbook('real_contacts_{}.xlsx'.format(len(valid_contacts)))
header_data = ['name', 'phone', 'passport', 'email']
worksheet = workbook.add_worksheet()
header_format = workbook.add_format({'bold': True})
for col_num, data in enumerate(header_data):
worksheet.write(row, col_num, data, header_format)
row = row + 1
for info in valid_contacts:
info.phone = get_random_phone_numbers()
info.passport = get_random_passport_id_number()
info.mail = generate_email_from_name(info.first_name, info.last_name)
# Iterate over the data and write it out row by row.
worksheet.write(row, col, "{} {}".format(info.last_name, info.first_name))
worksheet.write(row, col + 1, info.phone)
worksheet.write(row, col + 2, info.passport)
worksheet.write(row, col + 3, info.mail)
row += 1
workbook.close()
def write_destinaire_email(valid_contacts: list, generate_passport=True):
row = 0
col = 0
# Create a workbook and add a worksheet.
workbook = xlsxwriter.Workbook('real_contacts_{}.xlsx'.format(len(valid_contacts)))
header_data = ['name', 'phone', 'passport', 'email']
worksheet = workbook.add_worksheet()
header_format = workbook.add_format({'bold': True})
for col_num, data in enumerate(header_data):
worksheet.write(row, col_num, data, header_format)
row = row + 1
for info in valid_contacts:
info.phone = get_random_phone_numbers()
info.passport = get_random_passport_id_number()
info.mail = generate_email_from_name(info.first_name, info.last_name)
# Iterate over the data and write it out row by row.
worksheet.write(row, col, "{} {}".format(info.last_name, info.first_name))
worksheet.write(row, col + 1, info.phone)
worksheet.write(row, col + 2, info.passport)
worksheet.write(row, col + 3, info.mail)
row += 1
workbook.close()
def save_mails_to_db():
excel_reader = ExcelHelper()
emails = excel_reader.read_email_pojo("/Users/lpan/Downloads/aol_mails_21.xlsx")
print(emails)
for mail in emails:
MONGO_STORE_MANAGER.save_destinary_emails(mail)
if __name__ == '__main__':
excel_reader = ExcelHelper()
excel_reader.read_user_agens()
# contacts = excel_reader.read_names("/Users/lpan/Documents/rdv/backup_500.xlsx")
# print(contacts)
# write_new_contacts_to_excel(valid_contacts=contacts)
# save_mails_to_db()
# for mail in excel_reader.read_mails_and_pwd():
# MONGO_STORE_MANAGER.insert_email(mail)
# for i in range(1, 64):
# print(get_random_phone_numbers())
import json
import random
import string
import pandas as pandas
import xlsxwriter
from src.config import CONTACT_LIST_FILE
from src.db.mongo_manager import MONGO_STORE_MANAGER
from src.pojo.contact_pojo import ContactPojo
from src.pojo.mail.mail_pojo import MailAddress
from src.utils.generate_random_passport_id import get_random_passport_id_number
phone_number_prefix = ['6']
class ExcelHelper:
def __init__(self):
self._df = pandas.Series()
def write_to_exel(self, file_name, data_list: list):
new_df = pandas.Series(data_list)
self._df = pandas.concat([self._df, new_df])
self._df.to_excel(file_name)
def read_user_agens(self) -> list:
user_agent_in_json = pandas.read_excel(
"/Users/lpan/Documents/workspace/appointment_tool/docs/mobile_user_agent_list.xlsx").to_json(
orient='records')
user_agent_dict_list = json.loads(user_agent_in_json)
user_agent_list = []
for user_agent_dict in user_agent_dict_list:
user_agent_str = user_agent_dict['user_agent']
if 'Mozilla' in user_agent_str:
if 'Android 5.1' in user_agent_str:
if 'Mac OS' not in user_agent_str:
user_agent_list.append(user_agent_dict['user_agent'])
print(user_agent_list)
def read_contacts(self, file_name=CONTACT_LIST_FILE) -> list:
contact_list_in_json = pandas.read_excel(file_name).to_json(orient='records')
contact_dict_list = json.loads(contact_list_in_json)
contact_list = []
for contact_dict in contact_dict_list:
if contact_dict['name']:
raw_name = contact_dict['name'].strip()
name = raw_name.split(' ')
last_name = name[0]
if len(name) == 2:
first_name = name[-1]
else:
first_name = ''.join(name[1:len(name)])
contact = ContactPojo(phone_number=contact_dict['phone'],
last_name=last_name,
first_name=first_name,
passport_number=contact_dict['passport'],
mail=contact_dict['email'])
contact_list.append(contact)
return contact_list
def read_mails_and_pwd(self,
file_name='/Users/lpan/Desktop/163.xlsx'):
contact_list = []
mail_list_in_json = pandas.read_excel(file_name).to_json(orient='records')
contact_dict_list = json.loads(mail_list_in_json)
for contact_dict in contact_dict_list:
if contact_dict['mail']:
mail = contact_dict['mail'].strip()
pwd = contact_dict['password']
contact = MailAddress(mail, pwd)
contact_list.append(contact)
return contact_list
def read_names(self, file_name=CONTACT_LIST_FILE) -> list:
contact_list_in_json = pandas.read_excel(file_name).to_json(orient='records')
contact_dict_list = json.loads(contact_list_in_json)
contact_list = []
count = 2
for contact_dict in contact_dict_list:
if contact_dict['name']:
raw_name = contact_dict['name'].strip()
name = raw_name.split(' ')
if len(name) == 1:
name = raw_name.split('\xa0')
if len(name) == 1:
print("error in " + str(name))
last_name = name[0]
if len(name) == 2:
first_name = name[-1]
else:
first_name = ''.join(name[1:len(name)])
contact = ContactPojo(phone_number="",
last_name=last_name,
first_name=first_name,
passport_number="",
mail="")
if len(first_name) == 0:
print("first_name is empty: position:" + str(count))
print(name)
if len(last_name) == 0:
print("last_name is empty: position:" + str(count))
count = count + 1
contact_list.append(contact)
return contact_list
def read_email_pojo(self, file_name=CONTACT_LIST_FILE) -> list:
email_info_in_json = pandas.read_excel(file_name).to_json(orient='records')
contact_dict_list = json.loads(email_info_in_json)
contact_list = []
count = 0
for contact_dict in contact_dict_list:
if contact_dict['email']:
email = contact_dict['email'].strip()
password = contact_dict['code']
email_destinaire = MailAddress(email, password)
count = count + 1
contact_list.append(email_destinaire)
return contact_list
def get_random_phone_numbers():
length = 8 # number of characters in the string.
ran = ''.join(random.choices(string.digits, k=length))
id_number = random.choice(phone_number_prefix) + str(ran)
return id_number
def generate_email_from_name(first_name: str, last_name: str) -> str:
length = 2 # number of characters in the string.
ran = ''.join(random.choices(string.digits, k=length))
separator = ['.', '_', '']
domains = ['gmail.com', 'hotmail.com', 'yahoo.com', 'aol.com', 'outlook.com', 'hotmail.fr', 'gmx.com',
'hotmail.com', 'yahoo.com', 'aol.com', 'hotmail.com']
email = "{}{}{}{}@{}".format(last_name.lower(), random.choice(separator),
first_name.replace("-", "").replace("'", "").lower(), ran,
random.choice(domains))
print(email)
return email
def get_random_id_number() -> str:
# write_the_valid_profiles_to_excel()
S = 8 # number of characters in the string.
# call random.choices() string module to find the string in Uppercase + numeric data.
ran = ''.join(random.choices(string.digits, k=S))
print("The randomly generated string is : 94" + str(ran)) # print the random data
return ran
def write_new_contacts_to_excel(valid_contacts: list, generate_passport=True):
row = 0
col = 0
# Create a workbook and add a worksheet.
workbook = xlsxwriter.Workbook('real_contacts_{}.xlsx'.format(len(valid_contacts)))
header_data = ['name', 'phone', 'passport', 'email']
worksheet = workbook.add_worksheet()
header_format = workbook.add_format({'bold': True})
for col_num, data in enumerate(header_data):
worksheet.write(row, col_num, data, header_format)
row = row + 1
for info in valid_contacts:
info.phone = get_random_phone_numbers()
info.passport = get_random_passport_id_number()
info.mail = generate_email_from_name(info.first_name, info.last_name)
# Iterate over the data and write it out row by row.
worksheet.write(row, col, "{} {}".format(info.last_name, info.first_name))
worksheet.write(row, col + 1, info.phone)
worksheet.write(row, col + 2, info.passport)
worksheet.write(row, col + 3, info.mail)
row += 1
workbook.close()
def write_destinaire_email(valid_contacts: list, generate_passport=True):
row = 0
col = 0
# Create a workbook and add a worksheet.
workbook = xlsxwriter.Workbook('real_contacts_{}.xlsx'.format(len(valid_contacts)))
header_data = ['name', 'phone', 'passport', 'email']
worksheet = workbook.add_worksheet()
header_format = workbook.add_format({'bold': True})
for col_num, data in enumerate(header_data):
worksheet.write(row, col_num, data, header_format)
row = row + 1
for info in valid_contacts:
info.phone = get_random_phone_numbers()
info.passport = get_random_passport_id_number()
info.mail = generate_email_from_name(info.first_name, info.last_name)
# Iterate over the data and write it out row by row.
worksheet.write(row, col, "{} {}".format(info.last_name, info.first_name))
worksheet.write(row, col + 1, info.phone)
worksheet.write(row, col + 2, info.passport)
worksheet.write(row, col + 3, info.mail)
row += 1
workbook.close()
def save_mails_to_db():
excel_reader = ExcelHelper()
emails = excel_reader.read_email_pojo("/Users/lpan/Downloads/aol_mails_21.xlsx")
print(emails)
for mail in emails:
MONGO_STORE_MANAGER.save_destinary_emails(mail)
if __name__ == '__main__':
excel_reader = ExcelHelper()
excel_reader.read_user_agens()
# contacts = excel_reader.read_names("/Users/lpan/Documents/rdv/backup_500.xlsx")
# print(contacts)
# write_new_contacts_to_excel(valid_contacts=contacts)
# save_mails_to_db()
# for mail in excel_reader.read_mails_and_pwd():
# MONGO_STORE_MANAGER.insert_email(mail)
# for i in range(1, 64):
# print(get_random_phone_numbers())
+461 -410
View File
@@ -1,410 +1,461 @@
import datetime
import logging
import random
import re
import sys
import threading
import time
import traceback
from typing import Union
from src import params, definitions
from src.db.mongo_manager import MONGO_STORE_MANAGER
from src.pojo.ModeEnum import ModeEnum
from src.pojo.ReserveResultPojo import ReserveResultPojo, PublishType
from src.pojo.contact_pojo import ContactPojo
from src.proxy.proxy_type import ProxyType
from src.workers.SolveCaptch import SolveCaptcha
from src.workers.TlsPlaywright import TlsPlaywright
RDV_URL = "https://rendezvousparis.hermes.com/client/register"
# RDV_URL = "file:///Users/lpan/Downloads/test_appointment.html"
# RDV_URL = "https://api.ipify.org"
# RDV_URL ="https://bot.sannysoft.com/"
REGEX_RDV_URL = "https:\/\/rendezvousparis\.hermes\.com\/client\/register\/[A-Z0-9]+"
otp_value = None
OTP_FIELD_ID = "#sms_code"
MESSAGE_FIELD_CLASS = ".message"
BLANK_URL = "about:blank"
CONFIRMED_MESSAGE = "Your request for a Leather Goods appointment has been registered"
CONFIRMED_MESSAGE_FR = "Votre demande de rendez-vous Maroquinerie a bien été enregistrée et nous vous en remercions."
MESSAGE_URL_VALIDATION_FR = "Nous avons envoyé un lien par e-mail."
DOUBLE_REQUEST_ERROR_MESSAGE = "A request with the same data has already been validated today."
DOUBLE_REQUEST_ERROR_MESSAGE_FR = "Une demande avec les données saisies a déjà été validée aujourdhui."
TOO_MANY_REQUEST_ERROR_MESSAGE = "Due to a large number of requests"
TOO_MANY_REQUEST_ERROR_MESSAGE_FR = "Suite à un trop grand nombre de demandes"
CAPTCHA_ERROR_MESSAGE = "Error verifying captcha, please try again"
CAPTCHA_ERROR_MESSAGE_FR = "La vérification du captcha a échoué"
TIME_OUT = 10 * 60 * 1000 # 10 mins
OTP_TIMEOUT = 240
PAGE_TIMEOUT = 40000
def get_random_wait_time() -> float:
wait_time = random.randint(0, 10) / 10.0 * 1
return wait_time
class CommandorPage:
tls = TlsPlaywright()
def __init__(self, contact: ContactPojo, store_type=0, proxy_type=ProxyType.BRIGHT_DATA,
mode: ModeEnum = ModeEnum.MANUAL, headless=False):
self.otp_value = None
self.is_finished = False
self.contact = contact
self.contact.phone = self.contact.phone.replace(".0", "") # remove the .0 if the Excel format is not correct
self.logger = logging.getLogger("约会页面:" + str(self.contact.phone))
self.proxy_type = proxy_type
self.is_event_sent = False
self.is_captcha_in_error = False
self.is_filling_fields = False
self.headless = headless
self.appointment_mode = mode
# 0: random
# 1: faubourg
# 2: George
# 3: Sèvres
self.store_map = {
1: "faubourg",
2: "georgev",
3: "sevres"
}
self.store_type = store_type
def on_success(self, result: ReserveResultPojo):
self.logger.info("on_success called.")
self.is_finished = True
if not self.is_event_sent:
self.logger.info("will send successful event")
self.logger.info(result)
params.oracle_log_sender.send_appoint_result(result)
self.is_event_sent = True
def timeout_occurred(self):
params.oracle_log_sender.send_timeout_log(self.contact)
self.logger.info("will close timeout modem")
self.termine()
def _run(self, proxy):
self.logger.info("will start browser")
self.on_success_listener = on_success
# reset otp_value to None
self.otp_value = None
devices = random.choice(params.DEVICES)
first_page = None
while first_page is None:
first_page = self.start_browser(proxy, self.tls.playwright, devices)
proxy = params.get_proxy(self.proxy_type)
# self.thread_event = e
otp_input = self.page.locator(OTP_FIELD_ID)
otp_input.wait_for(state='visible', timeout=TIME_OUT)
self.logger.info("timeout")
self.termine()
def fill_fields(self):
if not self.is_filling_fields:
self.is_filling_fields = True
self.logger.info("填充信息: " + str(self.contact.phone))
self._set_name(self.contact.last_name, self.contact.first_name)
self._setPhoneCountryAndStore()
self._set_phone_number("0" + str(self.contact.phone))
self._set_email(self.contact.mail)
self._set_id_number(self.contact.passport)
self._checkCgu()
if self.appointment_mode == ModeEnum.AUTOMATIC:
self.resolve_captcha()
self.is_filling_fields = False
def start_browser(self, proxy, pwright, device) -> Union[str, None]:
try:
self.browser = pwright.firefox.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy)
# userAgent = random.choice(params.firefox_user_agent_list)
simulated_mobile = pwright.devices[device]
userAgent = simulated_mobile['user_agent']
print("user_agent is " + userAgent)
# context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
context = self.browser.new_context(user_agent=userAgent, locale='fr-FR')
self.page = context.new_page()
# hide webdriver information
self.page.add_init_script("""() => {
Object.defineProperty(navigator,'webdriver',{get: () => undefined});
Object.defineProperty(navigator, 'platform', {
get: () => {
return "iPhone";
}});
}
""")
self.page.on("load", self._on_page_loaded)
self.page.on("response", self.handle_response)
self.page.goto(RDV_URL, timeout=PAGE_TIMEOUT)
return self.page.content()
# self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy)
# self.logger.info("模拟设备: " + device)
# simulated_mobile = pwright.devices[device]
# context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
# self.page = context.new_page()
# # hide webdriver information
# self.page.add_init_script("""() => {
# Object.defineProperty(navigator,'webdriver',{get: () => undefined});
# Object.defineProperty(navigator, 'platform', {
# get: () => {
# return "iPhone";
# }});
# }
# """)
# self.page.on("load", self._on_page_loaded)
# self.page.on("response", self.handle_response)
# self.page.goto(RDV_URL, timeout=PAGE_TIMEOUT)
# captcha_url = "geo.captcha-delivery.com/captcha"
# if captcha_url in self.page.content():
# self.logger.info("will close browser")
# self.browser.close()
# return None
# else:
# return self.page.content()
except Exception as error:
params.oracle_log_sender.send_error(str(error))
traceback.print_exc(*sys.exc_info())
self.logger.exception(error)
self.logger.info("will close browser")
self.browser.close()
return None
def handle_response(self, response):
pattern = re.compile(REGEX_RDV_URL)
if pattern.match(response.url):
self.logger.info("result url found: " + response.url)
# self.publish_message_to_queue(self.contact, PublishType.PENDING, response.url)
def start_page(self, proxy):
e = threading.Event()
self._run(proxy)
def _on_page_loaded(self):
# time.sleep(40000)
self.logger.info("页面加载完毕")
self.logger.info("url is " + self.page.url)
captcha_url = "geo.captcha-delivery.com/captcha"
if captcha_url not in self.page.content():
if self.page.url == RDV_URL:
self.fill_fields()
try:
message = self.page.content()
if CONFIRMED_MESSAGE_FR in message or MESSAGE_URL_VALIDATION_FR in message:
# publish the successful message
self.publish_message_to_queue(self.contact, PublishType.SUCCESS, self.page.url)
self.get_errors()
except Exception as error:
self.logger.error(error)
def on_document_loaded(self):
self.logger.info("on_document_loaded called")
def _setPhoneCountryAndStore(self):
try:
if self.store_type == 0:
self.page.evaluate("""()=>{
//document.getElementById("phone_country").focus();
document.getElementById("phone_country").value = \"FR\" }""")
else:
store_to_choose = self.store_map[self.store_type]
self.page.evaluate("""(store_to_choose)=>{
document.getElementById("prefer").value = store_to_choose;
//document.getElementById("phone_country").focus();
document.getElementById("phone_country").value = \"FR\" }""", store_to_choose)
except Exception as error:
self.logger.error(error)
def _set_phone_number(self, phoneNumber):
time.sleep(get_random_wait_time())
try:
self.page.evaluate("""(phoneNumber)=>document.getElementById("phone_number").value =phoneNumber""",
phoneNumber)
except Exception as error:
self.logger.error(error)
def _set_name(self, lastName, firstName):
time.sleep(get_random_wait_time())
try:
self.page.evaluate("""(name)=> {
let surname = document.getElementById("surname");
if(surname.value.length == 0){
// surname.focus();
surname.value = name.lastName;
document.getElementById("name").focus();
document.getElementById("name").value = name.firstName
}}
""", {'lastName': lastName, 'firstName': firstName})
except Exception as error:
self.logger.error(error)
def get_errors(self):
# send error result
if self.page.url != BLANK_URL:
# no need to push blank url to db
if self.page.url != RDV_URL:
# no need to push RDV url to db
self.publish_message_to_queue(self.contact, PublishType.ERROR, self.page.url)
try:
items = self.page.query_selector("div.alert")
if items:
erro_content = items.inner_html()
self.logger.info("错误:" + erro_content)
self._handle_errors(erro_content)
except Exception as ext:
self.logger.error(ext)
def _handle_errors(self, erro_content: str):
if DOUBLE_REQUEST_ERROR_MESSAGE in erro_content or DOUBLE_REQUEST_ERROR_MESSAGE_FR in erro_content:
# this email has been already used
if not self.is_finished:
params.oracle_log_sender.send_double_data_error(self.contact)
MONGO_STORE_MANAGER.delete_captcha_error_contact_for_current_day(self.contact)
self.is_finished = True
self.termine()
elif TOO_MANY_REQUEST_ERROR_MESSAGE in erro_content or TOO_MANY_REQUEST_ERROR_MESSAGE_FR in erro_content:
# this email is in black list
if not self.is_finished:
params.oracle_log_sender.send_too_many_error(self.contact)
MONGO_STORE_MANAGER.insert_blacklist_contact(self.contact)
self.is_finished = True
self.termine()
elif CAPTCHA_ERROR_MESSAGE in erro_content or CAPTCHA_ERROR_MESSAGE_FR in erro_content:
# this email has been already used
self.is_captcha_in_error = True
if not self.is_finished:
# save the error to database with contact info
self.handle_captcha_error()
self.is_finished = True
# no need to retry captcha, if retry ,will generate DOUBLE_REQUEST_ERROR_MESSAGE
self.termine()
# self.resolve_captcha()
def _set_email(self, email):
time.sleep(get_random_wait_time())
try:
self.page.evaluate("""(email)=>{
let emailElement = document.getElementById("email")
if(emailElement.value.length == 0){
emailElement.focus();
document.getElementById("email").value = email;}}""", email)
except Exception as error:
self.logger.error(error)
def _set_id_number(self, id):
time.sleep(get_random_wait_time())
try:
self.page.evaluate(""" (id) =>{
document.getElementById("passport_id").focus();
document.getElementById("passport_id").value = id}""", id)
except Exception as error:
self.logger.error(error)
def _checkCgu(self):
try:
self.page.evaluate("""
document.getElementById("cgu").focus();
document.getElementById("cgu").checked = true;
document.getElementById("processing").focus();
document.getElementById("processing").checked = true""")
except Exception as error:
self.logger.error(error)
def clickOnValidBtn(self):
time.sleep(get_random_wait_time())
try:
self.page.evaluate("""document.getElementsByClassName("btn")[0].focus();""")
time.sleep(get_random_wait_time())
self.page.evaluate("""
document.getElementsByClassName("btn")[0].click();""")
except Exception as error:
self.logger.error(error)
def fill_otp(self, otp: str):
self.page.focus(OTP_FIELD_ID)
time.sleep(get_random_wait_time())
self.page.fill(OTP_FIELD_ID, otp)
def termine(self):
self.logger.info("will close browser")
time.sleep(1)
self.browser.close()
def publish_message_to_queue(self, contact: ContactPojo, status: PublishType, url: str):
# create the message
id = url.split("/")[-1]
result = ReserveResultPojo(type=status, phone=contact.phone, message=status.value, url=url,
firstName=contact.first_name, lastName=contact.last_name, email=contact.mail,
passport=contact.passport, ccid=contact.ccid)
result.id = id
result.store_type = self.store_type
definitions.firebase_store_manager.save(result)
collection_name = str(datetime.date.today())
MONGO_STORE_MANAGER.insert_reserve_result(collection_name=collection_name, reserve=result)
MONGO_STORE_MANAGER.delete_captcha_error_contact_for_current_day(self.contact)
MONGO_STORE_MANAGER.remove_contact_from_black_list(self.contact)
if status is PublishType.SUCCESS:
self.on_success(result)
time.sleep(2)
self.browser.close()
def resolve_captcha(self):
self.captcha_solver = SolveCaptcha(self.page)
self.captcha_solver.start(self.fill_captcha_solution)
def fill_captcha_solution(self, solution):
self.logger.info("will input solution")
try:
self.page.evaluate("""(solution)=>{
document.getElementById("g-recaptcha-response").innerHTML=solution;}""", solution)
self.logger.info("will click on valid btn")
self.clickOnValidBtn()
# wait for 20s
time.sleep(20)
if not self.is_finished:
if not self.is_captcha_in_error:
self.clickOnValidBtn()
else:
self.is_captcha_in_error = False
except Exception as error:
self.logger.error(error)
self.page.reload(timeout=PAGE_TIMEOUT)
def handle_captcha_error(self):
MONGO_STORE_MANAGER.insert_captcha_error_contact(self.contact)
params.oracle_log_sender.send_captcha_error(self.contact)
def on_success(result: ReserveResultPojo):
pass
def launch_page():
contact = ContactPojo(phone_number="+33758912245", passport_number="82546975", last_name="XU",
first_name="xingzhen",
mail="ColbyPatel653@gmail.com", ccid="", position=0)
page = CommandorPage(contact, store_type=1)
return page.start_page(params.get_proxy(ProxyType.BRIGHT_DATA))
def wait_for_otp(event: threading.Event, commandor: CommandorPage):
sec = input("Press Enter otp to continue...\n")
print("input otp is: " + sec)
commandor.otp_value = sec
event.set()
if __name__ == '__main__':
launch_page()
# time = get_random_wait_time()
# print(time)
# import urllib.request
#
# proxy = urllib.request.ProxyHandler({'https': 'http://panleicim-res-fr-121:94sY7zwBG13i@gw.ntnt.io:5959'})
# opener = urllib.request.build_opener(proxy)
# urllib.request.install_opener(opener)
# content = urllib.request.urlopen('https://api.ipify.org').read()
# print(content)
import datetime
import logging
import random
import re
import sys
import threading
import time
import traceback
from typing import Union
from anticaptchaofficial.antigatetask import antigateTask
from playwright._impl._api_structures import SetCookieParam
from playwright_stealth import stealth_sync
from src import params, definitions
from src.db.mongo_manager import MONGO_STORE_MANAGER
from src.pojo.ModeEnum import ModeEnum
from src.pojo.ReserveResultPojo import ReserveResultPojo, PublishType
from src.pojo.contact_pojo import ContactPojo
from src.proxy.proxy_type import ProxyType
from src.workers.SolveCaptch import SolveCaptcha
from src.workers.TlsPlaywright import TlsPlaywright
RDV_URL = "https://rendezvousparis.hermes.com/client/register"
# RDV_URL = "file:///Users/lpan/Downloads/test_appointment.html"
# RDV_URL = "https://api.ipify.org"
# RDV_URL ="https://bot.sannysoft.com/"
REGEX_RDV_URL = "https:\/\/rendezvousparis\.hermes\.com\/client\/register\/[A-Z0-9]+"
otp_value = None
OTP_FIELD_ID = "#sms_code"
MESSAGE_FIELD_CLASS = ".message"
BLANK_URL = "about:blank"
CONFIRMED_MESSAGE = "Your request for a Leather Goods appointment has been registered"
CONFIRMED_MESSAGE_FR = "Votre demande de rendez-vous Maroquinerie a bien été enregistrée et nous vous en remercions."
MESSAGE_URL_VALIDATION_FR = "Nous avons envoyé un lien par e-mail."
DOUBLE_REQUEST_ERROR_MESSAGE = "A request with the same data has already been validated today."
DOUBLE_REQUEST_ERROR_MESSAGE_FR = "Une demande avec les données saisies a déjà été validée aujourdhui."
TOO_MANY_REQUEST_ERROR_MESSAGE = "Due to a large number of requests"
TOO_MANY_REQUEST_ERROR_MESSAGE_FR = "Suite à un trop grand nombre de demandes"
CAPTCHA_ERROR_MESSAGE = "Error verifying captcha, please try again"
CAPTCHA_ERROR_MESSAGE_FR = "La vérification du captcha a échoué"
TIME_OUT = 10 * 60 * 1000 # 10 mins
OTP_TIMEOUT = 240
PAGE_TIMEOUT = 40000
def get_random_wait_time() -> float:
wait_time = random.randint(0, 10) / 10.0 * 1
return wait_time
class CommandorPage:
tls = TlsPlaywright()
def __init__(self, contact: ContactPojo, store_type=0, proxy_type=ProxyType.BRIGHT_DATA,
mode: ModeEnum = ModeEnum.MANUAL, headless=False):
self.otp_value = None
self.is_finished = False
self.current_context = None
self.contact = contact
self.contact.phone = self.contact.phone.replace(".0", "") # remove the .0 if the Excel format is not correct
self.logger = logging.getLogger("约会页面:" + str(self.contact.phone))
self.proxy_type = proxy_type
self.is_event_sent = False
self.is_captcha_in_error = False
self.is_filling_fields = False
self.headless = headless
self.appointment_mode = mode
# 0: random
# 1: faubourg
# 2: George
# 3: Sèvres
self.store_map = {
1: "faubourg",
2: "georgev",
3: "sevres"
}
self.store_type = store_type
def on_success(self, result: ReserveResultPojo):
self.logger.info("on_success called.")
self.is_finished = True
if not self.is_event_sent:
self.logger.info("will send successful event")
self.logger.info(result)
params.oracle_log_sender.send_appoint_result(result)
self.is_event_sent = True
def timeout_occurred(self):
params.oracle_log_sender.send_timeout_log(self.contact)
self.logger.info("will close timeout modem")
self.termine()
def _run(self, proxy):
self.logger.info("will start browser")
self.on_success_listener = on_success
# reset otp_value to None
self.otp_value = None
devices = random.choice(params.DEVICES)
first_page = None
while first_page is None:
first_page = self.start_browser(proxy, self.tls.playwright, devices)
proxy = params.get_proxy(self.proxy_type)
# self.thread_event = e
otp_input = self.page.locator(OTP_FIELD_ID)
otp_input.wait_for(state='visible', timeout=TIME_OUT)
self.logger.info("timeout")
self.termine()
def fill_fields(self):
if not self.is_filling_fields:
self.is_filling_fields = True
self.logger.info("填充信息: " + str(self.contact.phone))
self._set_name(self.contact.last_name, self.contact.first_name)
self._setPhoneCountryAndStore()
self._set_phone_number("0" + str(self.contact.phone))
self._set_email(self.contact.mail)
self._set_id_number(self.contact.passport)
self._checkCgu()
if self.appointment_mode == ModeEnum.AUTOMATIC:
self.resolve_captcha()
self.is_filling_fields = False
def start_browser(self, proxy, pwright, device) -> Union[str, None]:
try:
self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy)
# userAgent = random.choice(params.firefox_user_agent_list)
simulated_mobile = pwright.devices[device]
userAgent = simulated_mobile['user_agent']
print("user_agent is " + userAgent)
# context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
self.current_context = context
self.create_and_config_page(context)
return self.page.content()
# self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy)
# self.logger.info("模拟设备: " + device)
# simulated_mobile = pwright.devices[device]
# context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
# self.page = context.new_page()
# # hide webdriver information
# self.page.add_init_script("""() => {
# Object.defineProperty(navigator,'webdriver',{get: () => undefined});
# Object.defineProperty(navigator, 'platform', {
# get: () => {
# return "iPhone";
# }});
# }
# """)
# self.page.on("load", self._on_page_loaded)
# self.page.on("response", self.handle_response)
# self.page.goto(RDV_URL, timeout=PAGE_TIMEOUT)
# captcha_url = "geo.captcha-delivery.com/captcha"
# if captcha_url in self.page.content():
# self.logger.info("will close browser")
# self.browser.close()
# return None
# else:
# return self.page.content()
except Exception as error:
params.oracle_log_sender.send_error(str(error))
traceback.print_exc(*sys.exc_info())
self.logger.exception(error)
self.logger.info("will close browser")
self.browser.close()
return None
def create_and_config_page(self, context):
self.page = context.new_page()
stealth_sync(self.page)
# hide webdriver information
self.page.add_init_script("""() => {
Object.defineProperty(navigator,'webdriver',{get: () => undefined});
Object.defineProperty(navigator, 'platform', {
get: () => {
return "iPhone";
}});
}
""")
self.page.on("load", self._on_page_loaded)
self.page.on("response", self.handle_response)
self.page.goto(RDV_URL, timeout=PAGE_TIMEOUT)
def handle_response(self, response):
pattern = re.compile(REGEX_RDV_URL)
if pattern.match(response.url):
self.logger.info("result url found: " + response.url)
# self.publish_message_to_queue(self.contact, PublishType.PENDING, response.url)
def start_page(self, proxy):
e = threading.Event()
self._run(proxy)
def solve_datadome_captcha(self):
print("solve_datadome_captcha")
solver = antigateTask()
solver.set_verbose(1)
solver.set_key("ede6a69396fc961af351e7c8ffda9059")
solver.set_website_url(RDV_URL)
solver.set_template_name("Anti-bot screen bypass")
solver.set_variables({
"css_selector": ".captcha__human__container"
})
result = solver.solve_and_return_solution()
if result != 0:
cookies, localStorage, fingerprint, url, domain = result["cookies"], result["localStorage"], result[
"fingerprint"], result["url"], result["domain"]
print("cookies: ", cookies)
print("localStorage: ", localStorage)
print("fingerprint: ", fingerprint)
print("url: " + url)
print("domain: " + domain)
# add cookies to playwright
cookie_list = []
cookie_list.append(SetCookieParam(name='datadome', value=cookies['datadome'], url=url))
self.page.context.add_cookies(cookie_list)
self.config_page_with_fingerprint(fingerprint)
self.page.reload()
else:
print("task finished with error " + solver.error_code)
def _on_page_loaded(self):
# time.sleep(40000)
self.logger.info("页面加载完毕")
self.logger.info("url is " + self.page.url)
captcha_url = "geo.captcha-delivery.com/captcha"
if captcha_url not in self.page.content():
if self.page.url == RDV_URL:
self.fill_fields()
try:
message = self.page.content()
if CONFIRMED_MESSAGE_FR in message or MESSAGE_URL_VALIDATION_FR in message:
# publish the successful message
self.publish_message_to_queue(self.contact, PublishType.SUCCESS, self.page.url)
self.get_errors()
except Exception as error:
self.logger.error(error)
# else:
# self.solve_datadome_captcha()
def on_document_loaded(self):
self.logger.info("on_document_loaded called")
def _setPhoneCountryAndStore(self):
try:
if self.store_type == 0:
self.page.evaluate("""()=>{
//document.getElementById("phone_country").focus();
document.getElementById("phone_country").value = \"FR\" }""")
else:
store_to_choose = self.store_map[self.store_type]
self.page.evaluate("""(store_to_choose)=>{
document.getElementById("prefer").value = store_to_choose;
//document.getElementById("phone_country").focus();
document.getElementById("phone_country").value = \"FR\" }""", store_to_choose)
except Exception as error:
self.logger.error(error)
def _set_phone_number(self, phoneNumber):
time.sleep(get_random_wait_time())
try:
self.page.evaluate("""(phoneNumber)=>document.getElementById("phone_number").value =phoneNumber""",
phoneNumber)
except Exception as error:
self.logger.error(error)
def _set_name(self, lastName, firstName):
time.sleep(get_random_wait_time())
try:
self.page.evaluate("""(name)=> {
let surname = document.getElementById("surname");
if(surname.value.length == 0){
// surname.focus();
surname.value = name.lastName;
document.getElementById("name").focus();
document.getElementById("name").value = name.firstName
}}
""", {'lastName': lastName, 'firstName': firstName})
except Exception as error:
self.logger.error(error)
def get_errors(self):
# send error result
if self.page.url != BLANK_URL:
# no need to push blank url to db
if self.page.url != RDV_URL:
# no need to push RDV url to db
self.publish_message_to_queue(self.contact, PublishType.ERROR, self.page.url)
try:
items = self.page.query_selector("div.alert")
if items:
erro_content = items.inner_html()
self.logger.info("错误:" + erro_content)
self._handle_errors(erro_content)
except Exception as ext:
self.logger.error(ext)
def _handle_errors(self, erro_content: str):
if DOUBLE_REQUEST_ERROR_MESSAGE in erro_content or DOUBLE_REQUEST_ERROR_MESSAGE_FR in erro_content:
# this email has been already used
if not self.is_finished:
params.oracle_log_sender.send_double_data_error(self.contact)
MONGO_STORE_MANAGER.delete_captcha_error_contact_for_current_day(self.contact)
self.is_finished = True
self.termine()
elif TOO_MANY_REQUEST_ERROR_MESSAGE in erro_content or TOO_MANY_REQUEST_ERROR_MESSAGE_FR in erro_content:
# this email is in black list
if not self.is_finished:
params.oracle_log_sender.send_too_many_error(self.contact)
MONGO_STORE_MANAGER.insert_blacklist_contact(self.contact)
self.is_finished = True
self.termine()
elif CAPTCHA_ERROR_MESSAGE in erro_content or CAPTCHA_ERROR_MESSAGE_FR in erro_content:
# this email has been already used
self.is_captcha_in_error = True
if not self.is_finished:
# save the error to database with contact info
self.handle_captcha_error()
self.is_finished = True
# no need to retry captcha, if retry ,will generate DOUBLE_REQUEST_ERROR_MESSAGE
self.termine()
# self.resolve_captcha()
def _set_email(self, email):
time.sleep(get_random_wait_time())
try:
self.page.evaluate("""(email)=>{
let emailElement = document.getElementById("email")
if(emailElement.value.length == 0){
emailElement.focus();
document.getElementById("email").value = email;}}""", email)
except Exception as error:
self.logger.error(error)
def _set_id_number(self, id):
time.sleep(get_random_wait_time())
try:
self.page.evaluate(""" (id) =>{
document.getElementById("passport_id").focus();
document.getElementById("passport_id").value = id}""", id)
except Exception as error:
self.logger.error(error)
def _checkCgu(self):
try:
self.page.evaluate("""
document.getElementById("cgu").focus();
document.getElementById("cgu").checked = true;
document.getElementById("processing").focus();
document.getElementById("processing").checked = true""")
except Exception as error:
self.logger.error(error)
def clickOnValidBtn(self):
time.sleep(get_random_wait_time())
try:
self.page.evaluate("""document.getElementsByClassName("btn")[0].focus();""")
time.sleep(get_random_wait_time())
self.page.evaluate("""
document.getElementsByClassName("btn")[0].click();""")
except Exception as error:
self.logger.error(error)
def fill_otp(self, otp: str):
self.page.focus(OTP_FIELD_ID)
time.sleep(get_random_wait_time())
self.page.fill(OTP_FIELD_ID, otp)
def termine(self):
self.logger.info("will close browser")
time.sleep(1)
self.browser.close()
def publish_message_to_queue(self, contact: ContactPojo, status: PublishType, url: str):
# create the message
id = url.split("/")[-1]
result = ReserveResultPojo(type=status, phone=contact.phone, message=status.value, url=url,
firstName=contact.first_name, lastName=contact.last_name, email=contact.mail,
passport=contact.passport, ccid=contact.ccid)
result.id = id
result.store_type = self.store_type
definitions.firebase_store_manager.save(result)
collection_name = str(datetime.date.today())
MONGO_STORE_MANAGER.insert_reserve_result(collection_name=collection_name, reserve=result)
MONGO_STORE_MANAGER.delete_captcha_error_contact_for_current_day(self.contact)
MONGO_STORE_MANAGER.remove_contact_from_black_list(self.contact)
if status is PublishType.SUCCESS:
self.on_success(result)
time.sleep(2)
self.browser.close()
def resolve_captcha(self):
self.captcha_solver = SolveCaptcha(self.page)
self.captcha_solver.start(self.fill_captcha_solution)
def fill_captcha_solution(self, solution):
self.logger.info("will input solution")
try:
self.page.evaluate("""(solution)=>{
document.getElementById("g-recaptcha-response").innerHTML=solution;}""", solution)
self.logger.info("will click on valid btn")
self.clickOnValidBtn()
# wait for 20s
time.sleep(20)
if not self.is_finished:
if not self.is_captcha_in_error:
self.clickOnValidBtn()
else:
self.is_captcha_in_error = False
except Exception as error:
self.logger.error(error)
self.page.reload(timeout=PAGE_TIMEOUT)
def handle_captcha_error(self):
MONGO_STORE_MANAGER.insert_captcha_error_contact(self.contact)
params.oracle_log_sender.send_captcha_error(self.contact)
def config_page_with_fingerprint(self, fingerprint):
self.page.add_init_script("""() => {
Object.defineProperty(navigator,'webdriver',{get: () => undefined});
Object.defineProperty(navigator, 'platform', {
get: () => {
return "iPhone";
}});
}
""")
def on_success(result: ReserveResultPojo):
pass
def launch_page():
contact = ContactPojo(phone_number="+33758912245", passport_number="82546975", last_name="XU",
first_name="xingzhen",
mail="ColbyPatel653@gmail.com", ccid="", position=0)
page = CommandorPage(contact, store_type=1)
return page.start_page(params.get_proxy(ProxyType.BRIGHT_DATA))
def wait_for_otp(event: threading.Event, commandor: CommandorPage):
sec = input("Press Enter otp to continue...\n")
print("input otp is: " + sec)
commandor.otp_value = sec
event.set()
if __name__ == '__main__':
launch_page()
# time = get_random_wait_time()
# print(time)
# import urllib.request
#
# proxy = urllib.request.ProxyHandler({'https': 'http://panleicim-res-fr-121:94sY7zwBG13i@gw.ntnt.io:5959'})
# opener = urllib.request.build_opener(proxy)
# urllib.request.install_opener(opener)
# content = urllib.request.urlopen('https://api.ipify.org').read()
# print(content)
+135 -135
View File
@@ -1,135 +1,135 @@
import logging
import random
import traceback
from typing import Union
import sys
import time
from src import params
from src.db.mongo_manager import MONGO_STORE_MANAGER
from src.pojo.ReserveResultPojo import PublishType
from src.proxy.proxy_type import ProxyType
from src.workers.TlsPlaywright import TlsPlaywright
OTP_FIELD_ID = "#sms_code"
TIME_OUT = 10 * 60 * 1000 # 10 mins
PAGE_TIMEOUT = 40000
CONFIRMED_MESSAGE_FR = "Votre demande de rendez-vous Maroquinerie a bien été enregistrée et nous vous en remercions."
SORRY_SENTENCE_FR = "nous sommes sincèrement désolés de n'avoir pu vous satisfaire cette fois-ci"
DOUBLE_REQUEST_ERROR_MESSAGE_FR = "Une demande avec les données saisies a déjà été validée aujourdhui."
class LinkValidator:
tls = TlsPlaywright()
def __init__(self, link: str, proxy_type=ProxyType.BRIGHT_DATA, headless=False):
self.is_finished = False
self.link = link
self.proxy_type = proxy_type
self.is_event_sent = False
self.is_captcha_in_error = False
self.is_filling_fields = False
self.headless = headless
self.logger = logging.getLogger("LinkValidator")
def on_success(self):
self.logger.info("on_success called.")
self.is_finished = True
if not self.is_event_sent:
self.logger.info("will send successful event")
params.oracle_log_sender.send_url_validation_result()
self.is_event_sent = True
def timeout_occurred(self):
params.oracle_log_sender.send_timeout_log(self.link)
self.logger.info("will close timeout modem")
self.termine()
def _run(self, proxy):
self.logger.info("will start browser")
# reset otp_value to None
devices = random.choice(params.DEVICES)
first_page = None
while first_page is None:
first_page = self.start_browser(proxy, self.tls.playwright, devices)
proxy = params.get_proxy(self.proxy_type)
otp_input = self.page.locator(OTP_FIELD_ID)
otp_input.wait_for(state='visible', timeout=TIME_OUT)
self.logger.info("timeout")
self.termine()
def start_browser(self, proxy, pwright, device) -> Union[str, None]:
try:
self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy)
self.logger.info("模拟设备: " + device)
simulated_mobile = pwright.devices[device]
context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
self.page = context.new_page()
# hide webdriver information
self.page.add_init_script("""() => {
Object.defineProperty(navigator,'webdriver',{get: () => undefined});
Object.defineProperty(navigator, 'platform', {
get: () => {
return "iPhone";
}});
}
""")
self.page.on("load", self._on_page_loaded)
self.page.goto(self.link, timeout=PAGE_TIMEOUT)
captcha_url = "geo.captcha-delivery.com/captcha"
if captcha_url in self.page.content():
self.logger.info("will close browser")
self.browser.close()
return None
else:
return self.page.content()
except Exception as error:
params.oracle_log_sender.send_error(str(error))
traceback.print_exc(*sys.exc_info())
self.logger.exception(error)
self.logger.info("will close browser")
self.browser.close()
return None
def start_page(self, proxy, headless=False):
self.headless = headless
self._run(proxy)
def _on_page_loaded(self):
self.logger.info("页面加载完毕")
self.logger.info("url is " + self.page.url)
try:
message = self.page.content()
if CONFIRMED_MESSAGE_FR in message:
# publish the successful message
self.publish_message_to_queue(PublishType.SUCCESS)
elif SORRY_SENTENCE_FR in message:
# publish the successful message
self.publish_message_to_queue(PublishType.SUCCESS)
elif DOUBLE_REQUEST_ERROR_MESSAGE_FR in message:
# publish the successful message
self.publish_message_to_queue(PublishType.DUPLICATED)
except Exception as error:
self.logger.error(error)
def on_document_loaded(self):
self.logger.info("on_document_loaded called")
def _handle_errors(self, erro_content: str):
pass
def termine(self):
self.logger.info("will close browser")
time.sleep(1)
self.browser.close()
def publish_message_to_queue(self, status: PublishType):
# create the message
if status is PublishType.SUCCESS:
MONGO_STORE_MANAGER.link_validated_for_result(self.page.url)
else:
MONGO_STORE_MANAGER.link_validated_for_result(self.page.url, state=status.name)
self.on_success()
time.sleep(2)
self.browser.close()
import logging
import random
import traceback
from typing import Union
import sys
import time
from src import params
from src.db.mongo_manager import MONGO_STORE_MANAGER
from src.pojo.ReserveResultPojo import PublishType
from src.proxy.proxy_type import ProxyType
from src.workers.TlsPlaywright import TlsPlaywright
OTP_FIELD_ID = "#sms_code"
TIME_OUT = 10 * 60 * 1000 # 10 mins
PAGE_TIMEOUT = 40000
CONFIRMED_MESSAGE_FR = "Votre demande de rendez-vous Maroquinerie a bien été enregistrée et nous vous en remercions."
SORRY_SENTENCE_FR = "nous sommes sincèrement désolés de n'avoir pu vous satisfaire cette fois-ci"
DOUBLE_REQUEST_ERROR_MESSAGE_FR = "Une demande avec les données saisies a déjà été validée aujourdhui."
class LinkValidator:
tls = TlsPlaywright()
def __init__(self, link: str, proxy_type=ProxyType.BRIGHT_DATA, headless=False):
self.is_finished = False
self.link = link
self.proxy_type = proxy_type
self.is_event_sent = False
self.is_captcha_in_error = False
self.is_filling_fields = False
self.headless = headless
self.logger = logging.getLogger("LinkValidator")
def on_success(self):
self.logger.info("on_success called.")
self.is_finished = True
if not self.is_event_sent:
self.logger.info("will send successful event")
params.oracle_log_sender.send_url_validation_result()
self.is_event_sent = True
def timeout_occurred(self):
params.oracle_log_sender.send_timeout_log(self.link)
self.logger.info("will close timeout modem")
self.termine()
def _run(self, proxy):
self.logger.info("will start browser")
# reset otp_value to None
devices = random.choice(params.DEVICES)
first_page = None
while first_page is None:
first_page = self.start_browser(proxy, self.tls.playwright, devices)
proxy = params.get_proxy(self.proxy_type)
otp_input = self.page.locator(OTP_FIELD_ID)
otp_input.wait_for(state='visible', timeout=TIME_OUT)
self.logger.info("timeout")
self.termine()
def start_browser(self, proxy, pwright, device) -> Union[str, None]:
try:
self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy)
self.logger.info("模拟设备: " + device)
simulated_mobile = pwright.devices[device]
context = self.browser.new_context(**simulated_mobile, locale='fr-FR')
self.page = context.new_page()
# hide webdriver information
self.page.add_init_script("""() => {
Object.defineProperty(navigator,'webdriver',{get: () => undefined});
Object.defineProperty(navigator, 'platform', {
get: () => {
return "iPhone";
}});
}
""")
self.page.on("load", self._on_page_loaded)
self.page.goto(self.link, timeout=PAGE_TIMEOUT)
captcha_url = "geo.captcha-delivery.com/captcha"
if captcha_url in self.page.content():
self.logger.info("will close browser")
self.browser.close()
return None
else:
return self.page.content()
except Exception as error:
params.oracle_log_sender.send_error(str(error))
traceback.print_exc(*sys.exc_info())
self.logger.exception(error)
self.logger.info("will close browser")
self.browser.close()
return None
def start_page(self, proxy, headless=False):
self.headless = headless
self._run(proxy)
def _on_page_loaded(self):
self.logger.info("页面加载完毕")
self.logger.info("url is " + self.page.url)
try:
message = self.page.content()
if CONFIRMED_MESSAGE_FR in message:
# publish the successful message
self.publish_message_to_queue(PublishType.SUCCESS)
elif SORRY_SENTENCE_FR in message:
# publish the successful message
self.publish_message_to_queue(PublishType.SUCCESS)
elif DOUBLE_REQUEST_ERROR_MESSAGE_FR in message:
# publish the successful message
self.publish_message_to_queue(PublishType.DUPLICATED)
except Exception as error:
self.logger.error(error)
def on_document_loaded(self):
self.logger.info("on_document_loaded called")
def _handle_errors(self, erro_content: str):
pass
def termine(self):
self.logger.info("will close browser")
time.sleep(1)
self.browser.close()
def publish_message_to_queue(self, status: PublishType):
# create the message
if status is PublishType.SUCCESS:
MONGO_STORE_MANAGER.link_validated_for_result(self.page.url)
else:
MONGO_STORE_MANAGER.link_validated_for_result(self.page.url, state=status.name)
self.on_success()
time.sleep(2)
self.browser.close()