diff --git a/requirements.txt b/requirements.txt index e925aa4..6b508cc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,18 @@ -dataclasses_json==0.5.7 -firebase_admin==5.2.0 -pandas~=1.3.5 -playwright==1.23.0 -dataclasses~=0.6 -pymongo==4.1.1 -oci~=2.54.1 -XlsxWriter~=3.0.3 -boto3~=1.21.13 -openpyxl==3.0.9 -google-cloud-firestore==2.4.0 -PySimpleGUI==4.60.1 -SQLAlchemy~=1.4.37 -requests~=2.27.1 -sqlalchemy_utils~=0.38.2 +dataclasses_json==0.5.7 +firebase_admin==5.2.0 +pandas~=1.3.5 +playwright==1.25.0 +dataclasses~=0.6 +pymongo==4.1.1 +anticaptchaofficial==1.0.43 +oci~=2.54.1 +XlsxWriter~=3.0.3 +playwright-stealth==1.0.5 +boto3~=1.21.13 +openpyxl==3.0.9 +google-cloud-firestore==2.4.0 +PySimpleGUI==4.60.1 +SQLAlchemy~=1.4.37 +requests~=2.27.1 +sqlalchemy_utils~=0.38.2 Mako~=1.2.0 \ No newline at end of file diff --git a/src/mail/mail_reader.py b/src/mail/mail_reader.py index 75eeffa..3f40c22 100644 --- a/src/mail/mail_reader.py +++ b/src/mail/mail_reader.py @@ -1,173 +1,173 @@ -import datetime -import email -import imaplib -import re -from concurrent.futures import ThreadPoolExecutor -from email.header import decode_header -from email.message import Message - -from builtins import list - -from src import params -from src.db.mongo_manager import MONGO_STORE_MANAGER -from src.pojo.mail.mail_pojo import MailPojo, MailAddress -from src.proxy.proxy_type import ProxyType -from src.workers.link_validator import LinkValidator - -AOL_IMAP_SERVER = "imap.aol.com" -IMAP_SERVER_163 = "imap.163.com" -VALIDATION_URL_SUBJECT = 'Validation de votre demande de rendez-vous' -VALIDATION_URL_REGEX = """https:\/\/rendezvousparis.hermes.com\/client\/register\/[A-Z0-9]+\/validate.code=[A-Z0-9]+""" -HERMES_EMAIL = "no-reply@hermes.com" - -DOMAIN_163 = "163.com" -date_format = "%d-%b-%Y" # DD-Mon-YYYY e.g., 3-Mar-2014 - - -class MailReader(): - def __init__(self, login, password): - self.login = login - self.password = password - - @staticmethod - def show_folders(imap): - for i in imap.list()[1]: - l = i.decode().split(' "/" ') - print(l[0] + " = " + l[1]) - - def read_emails(self, mails_messages: list) -> list: - # create an IMAP4 class with SSL - if DOMAIN_163 in self.login: - imap = imaplib.IMAP4_SSL(IMAP_SERVER_163) - else: - imap = imaplib.IMAP4_SSL(AOL_IMAP_SERVER) - # authenticate - imap.login(self.login, self.password) - mail_list = [] - print("read mails from {}".format(self.login)) - # self.show_folders(imap) - # total number of emails - # get mails from inbox - # (\Archive \HasNoChildren) = "Archive" - # (\Junk \HasNoChildren) = "Bulk" - # (\Drafts \HasNoChildren) = "Draft" - # (\HasNoChildren) = "Inbox" - # (\Sent \HasNoChildren) = "Sent" - # (\Trash \HasNoChildren) = "Trash" - mail_list.extend(self._get_messages_from_folder(imap)) - # mail_list.extend(self._get_messages_from_folder(imap, folder="Bulk")) - # close the connection and logout - imap.close() - imap.logout() - mails_messages.extend(mail_list) - return mail_list - - def _get_messages_from_folder(self, imap, folder="INBOX") -> list: - imap.select(folder) - mail_messages = [] - typ, data = imap.search(None, '(SUBJECT "{}" SINCE "{}")'.format(VALIDATION_URL_SUBJECT, - datetime.datetime.today().strftime( - date_format))) - for i in data[0].split(): - # fetch the email message by ID - res, msg = imap.fetch(i.decode("utf-8"), "(RFC822)") - body = '' - for response in msg: - if isinstance(response, tuple): - # parse a bytes email into a message object - msg = email.message_from_bytes(response[1]) - # decode the email subject - subject, subject_encoded = decode_header(msg["Subject"])[0] - received_date = msg["Date"] - if isinstance(subject, bytes): - # if it's a bytes, decode to str - subject = subject.decode(subject_encoded) - # decode email sender - from_address, subject_encoded = decode_header(msg.get("From"))[0] - if isinstance(from_address, bytes): - from_address = from_address.decode(subject_encoded) - print("From:", from_address) - print("Subject:", subject) - # if the email message is multipart - if msg.is_multipart(): - # iterate over email parts - for part in msg.walk(): - try: - # get the email body - payloads = part.get_payload() - if isinstance(payloads, list): - for payload in payloads: - if isinstance(payload, Message): - body = body + payload.get_payload(decode=True).decode("iso-8859-1") - # print(body) - except Exception as Error: - print(Error) - else: - body = msg.get_payload(decode=True).decode() - print(body) - if VALIDATION_URL_SUBJECT in subject: - mail = MailPojo(subject=subject, body=body, from_address=from_address) - mail_messages.append(mail) - return mail_messages - - -def clean(text): - # clean text for creating a folder - return "".join(c if c.isalnum() else "_" for c in text) - - -def need_to_valid_url(url: str, successful_items) -> bool: - print("url is :" + url) - parts = url.split('/') - id = parts[5] - if len(id) == 6: - for item in successful_items: - # if item.url_validated is not None: - # print("id:{}, status:{} ".format(id, str(item.url_validated))) - if item.id == id: - if item.url_validated is not None: - return not item.url_validated - else: - # if url_validated is None - return True - # return True by default - return False - else: - print("id not valid:{}".format(id)) - return False - - -def read_mails(): - # get email address - mail_list = MONGO_STORE_MANAGER.get_destination_emails() - # # mail_address1 = MailAddress(mail="appointment2022@aol.com", password="gyilpmvyyvlcaviq") - # # mail_address2 = MailAddress(mail="chenpeijun@aol.com", password="ytifuwguknzifqyb") - # # mail_address2 = MailAddress(mail="sdfgfhgf1986@aol.com", password="fjwcgvhxxlywqfwm") - # # mail_address3 = MailAddress(mail="ciyuexie@aol.com", password="czezlmmyypokdfce") - # # mail_address4 = MailAddress(mail="hongjiang176@aol.com", password="ftzpscgzvwneelmn") - # mail_address4 = MailAddress(mail="ribka_puchkova@aol.com", password="aqvsozoyifbixtdc") - # mail_list = [mail_address3, mail_address2, mail_address1, mail_address4] - # mail_list = [mail_address4] - successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day() - mails_messages = [] - with ThreadPoolExecutor(max_workers=20) as executor: - for mail in mail_list: - mail_reader = MailReader(mail.mail, mail.password) - executor.submit(mail_reader.read_emails, mails_messages) - - with ThreadPoolExecutor(max_workers=20) as executor: - for mail in mails_messages: - match = re.search(VALIDATION_URL_REGEX, mail.body) - if match: - url = match.group(0) - if need_to_valid_url(url, successful_items): - url_validator = LinkValidator(url) - print("need to validate url: " + url) - executor.submit(url_validator.start_page, params.get_proxy(ProxyType.BRIGHT_DATA), True) - else: - print("do not need to click url --> {}".format(mail.mail_address)) - - -# check whether the url has already been clicked -if __name__ == '__main__': - read_mails() +import datetime +import email +import imaplib +import re +from concurrent.futures import ThreadPoolExecutor +from email.header import decode_header +from email.message import Message + +from builtins import list + +from src import params +from src.db.mongo_manager import MONGO_STORE_MANAGER +from src.pojo.mail.mail_pojo import MailPojo, MailAddress +from src.proxy.proxy_type import ProxyType +from src.workers.link_validator import LinkValidator + +AOL_IMAP_SERVER = "imap.aol.com" +IMAP_SERVER_163 = "imap.163.com" +VALIDATION_URL_SUBJECT = 'Validation de votre demande de rendez-vous' +VALIDATION_URL_REGEX = """https:\/\/rendezvousparis.hermes.com\/client\/register\/[A-Z0-9]+\/validate.code=[A-Z0-9]+""" +HERMES_EMAIL = "no-reply@hermes.com" + +DOMAIN_163 = "163.com" +date_format = "%d-%b-%Y" # DD-Mon-YYYY e.g., 3-Mar-2014 + + +class MailReader(): + def __init__(self, login, password): + self.login = login + self.password = password + + @staticmethod + def show_folders(imap): + for i in imap.list()[1]: + l = i.decode().split(' "/" ') + print(l[0] + " = " + l[1]) + + def read_emails(self, mails_messages: list) -> list: + # create an IMAP4 class with SSL + if DOMAIN_163 in self.login: + imap = imaplib.IMAP4_SSL(IMAP_SERVER_163) + else: + imap = imaplib.IMAP4_SSL(AOL_IMAP_SERVER) + # authenticate + imap.login(self.login, self.password) + mail_list = [] + print("read mails from {}".format(self.login)) + # self.show_folders(imap) + # total number of emails + # get mails from inbox + # (\Archive \HasNoChildren) = "Archive" + # (\Junk \HasNoChildren) = "Bulk" + # (\Drafts \HasNoChildren) = "Draft" + # (\HasNoChildren) = "Inbox" + # (\Sent \HasNoChildren) = "Sent" + # (\Trash \HasNoChildren) = "Trash" + mail_list.extend(self._get_messages_from_folder(imap)) + # mail_list.extend(self._get_messages_from_folder(imap, folder="Bulk")) + # close the connection and logout + imap.close() + imap.logout() + mails_messages.extend(mail_list) + return mail_list + + def _get_messages_from_folder(self, imap, folder="INBOX") -> list: + imap.select(folder) + mail_messages = [] + typ, data = imap.search(None, '(SUBJECT "{}" SINCE "{}")'.format(VALIDATION_URL_SUBJECT, + datetime.datetime.today().strftime( + date_format))) + for i in data[0].split(): + # fetch the email message by ID + res, msg = imap.fetch(i.decode("utf-8"), "(RFC822)") + body = '' + for response in msg: + if isinstance(response, tuple): + # parse a bytes email into a message object + msg = email.message_from_bytes(response[1]) + # decode the email subject + subject, subject_encoded = decode_header(msg["Subject"])[0] + received_date = msg["Date"] + if isinstance(subject, bytes): + # if it's a bytes, decode to str + subject = subject.decode(subject_encoded) + # decode email sender + from_address, subject_encoded = decode_header(msg.get("From"))[0] + if isinstance(from_address, bytes): + from_address = from_address.decode(subject_encoded) + print("From:", from_address) + print("Subject:", subject) + # if the email message is multipart + if msg.is_multipart(): + # iterate over email parts + for part in msg.walk(): + try: + # get the email body + payloads = part.get_payload() + if isinstance(payloads, list): + for payload in payloads: + if isinstance(payload, Message): + body = body + payload.get_payload(decode=True).decode("iso-8859-1") + # print(body) + except Exception as Error: + print(Error) + else: + body = msg.get_payload(decode=True).decode() + print(body) + if VALIDATION_URL_SUBJECT in subject: + mail = MailPojo(subject=subject, body=body, from_address=from_address) + mail_messages.append(mail) + return mail_messages + + +def clean(text): + # clean text for creating a folder + return "".join(c if c.isalnum() else "_" for c in text) + + +def need_to_valid_url(url: str, successful_items) -> bool: + print("url is :" + url) + parts = url.split('/') + id = parts[5] + if len(id) == 6: + for item in successful_items: + # if item.url_validated is not None: + # print("id:{}, status:{} ".format(id, str(item.url_validated))) + if item.id == id: + if item.url_validated is not None: + return not item.url_validated + else: + # if url_validated is None + return True + # return True by default + return False + else: + print("id not valid:{}".format(id)) + return False + + +def read_mails(): + # get email address + mail_list = MONGO_STORE_MANAGER.get_destination_emails() + # # mail_address1 = MailAddress(mail="appointment2022@aol.com", password="gyilpmvyyvlcaviq") + # # mail_address2 = MailAddress(mail="chenpeijun@aol.com", password="ytifuwguknzifqyb") + # # mail_address2 = MailAddress(mail="sdfgfhgf1986@aol.com", password="fjwcgvhxxlywqfwm") + # # mail_address3 = MailAddress(mail="ciyuexie@aol.com", password="czezlmmyypokdfce") + # # mail_address4 = MailAddress(mail="hongjiang176@aol.com", password="ftzpscgzvwneelmn") + # mail_address4 = MailAddress(mail="ribka_puchkova@aol.com", password="aqvsozoyifbixtdc") + # mail_list = [mail_address3, mail_address2, mail_address1, mail_address4] + # mail_list = [mail_address4] + successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day() + mails_messages = [] + with ThreadPoolExecutor(max_workers=20) as executor: + for mail in mail_list: + mail_reader = MailReader(mail.mail, mail.password) + executor.submit(mail_reader.read_emails, mails_messages) + + with ThreadPoolExecutor(max_workers=20) as executor: + for mail in mails_messages: + match = re.search(VALIDATION_URL_REGEX, mail.body) + if match: + url = match.group(0) + if need_to_valid_url(url, successful_items): + url_validator = LinkValidator(url) + print("need to validate url: " + url) + executor.submit(url_validator.start_page, params.get_proxy(ProxyType.BRIGHT_DATA), True) + else: + print("do not need to click url --> {}".format(mail.mail_address)) + + +# check whether the url has already been clicked +if __name__ == '__main__': + read_mails() diff --git a/src/utils/excel_reader.py b/src/utils/excel_reader.py index debc852..d19d9fb 100644 --- a/src/utils/excel_reader.py +++ b/src/utils/excel_reader.py @@ -1,225 +1,225 @@ -import json -import random -import string - -import pandas as pandas -import xlsxwriter - -from src.config import CONTACT_LIST_FILE -from src.db.mongo_manager import MONGO_STORE_MANAGER -from src.pojo.contact_pojo import ContactPojo -from src.pojo.mail.mail_pojo import MailAddress -from src.utils.generate_random_passport_id import get_random_passport_id_number - -phone_number_prefix = ['6'] - - -class ExcelHelper: - - def __init__(self): - self._df = pandas.Series() - - def write_to_exel(self, file_name, data_list: list): - new_df = pandas.Series(data_list) - self._df = pandas.concat([self._df, new_df]) - self._df.to_excel(file_name) - - def read_user_agens(self) -> list: - user_agent_in_json = pandas.read_excel( - "/Users/lpan/Documents/workspace/appointment_tool/docs/mobile_user_agent_list.xlsx").to_json( - orient='records') - user_agent_dict_list = json.loads(user_agent_in_json) - user_agent_list = [] - for user_agent_dict in user_agent_dict_list: - user_agent_str = user_agent_dict['user_agent'] - if 'Mozilla' in user_agent_str: - if 'Android 5.1' in user_agent_str: - if 'Mac OS' not in user_agent_str: - user_agent_list.append(user_agent_dict['user_agent']) - print(user_agent_list) - - def read_contacts(self, file_name=CONTACT_LIST_FILE) -> list: - contact_list_in_json = pandas.read_excel(file_name).to_json(orient='records') - contact_dict_list = json.loads(contact_list_in_json) - contact_list = [] - for contact_dict in contact_dict_list: - if contact_dict['name']: - raw_name = contact_dict['name'].strip() - name = raw_name.split(' ') - last_name = name[0] - if len(name) == 2: - first_name = name[-1] - else: - first_name = ''.join(name[1:len(name)]) - - contact = ContactPojo(phone_number=contact_dict['phone'], - last_name=last_name, - first_name=first_name, - passport_number=contact_dict['passport'], - mail=contact_dict['email']) - contact_list.append(contact) - return contact_list - - def read_mails_and_pwd(self, - file_name='/Users/lpan/Desktop/163.xlsx'): - contact_list = [] - mail_list_in_json = pandas.read_excel(file_name).to_json(orient='records') - contact_dict_list = json.loads(mail_list_in_json) - for contact_dict in contact_dict_list: - if contact_dict['mail']: - mail = contact_dict['mail'].strip() - pwd = contact_dict['password'] - contact = MailAddress(mail, pwd) - contact_list.append(contact) - return contact_list - - def read_names(self, file_name=CONTACT_LIST_FILE) -> list: - contact_list_in_json = pandas.read_excel(file_name).to_json(orient='records') - contact_dict_list = json.loads(contact_list_in_json) - contact_list = [] - count = 2 - for contact_dict in contact_dict_list: - if contact_dict['name']: - raw_name = contact_dict['name'].strip() - name = raw_name.split(' ') - if len(name) == 1: - name = raw_name.split('\xa0') - if len(name) == 1: - print("error in " + str(name)) - last_name = name[0] - if len(name) == 2: - first_name = name[-1] - else: - first_name = ''.join(name[1:len(name)]) - - contact = ContactPojo(phone_number="", - last_name=last_name, - first_name=first_name, - passport_number="", - mail="") - - if len(first_name) == 0: - print("first_name is empty: position:" + str(count)) - print(name) - if len(last_name) == 0: - print("last_name is empty: position:" + str(count)) - count = count + 1 - contact_list.append(contact) - - return contact_list - - def read_email_pojo(self, file_name=CONTACT_LIST_FILE) -> list: - email_info_in_json = pandas.read_excel(file_name).to_json(orient='records') - contact_dict_list = json.loads(email_info_in_json) - contact_list = [] - count = 0 - for contact_dict in contact_dict_list: - if contact_dict['email']: - email = contact_dict['email'].strip() - password = contact_dict['code'] - email_destinaire = MailAddress(email, password) - count = count + 1 - contact_list.append(email_destinaire) - - return contact_list - - -def get_random_phone_numbers(): - length = 8 # number of characters in the string. - ran = ''.join(random.choices(string.digits, k=length)) - id_number = random.choice(phone_number_prefix) + str(ran) - return id_number - - -def generate_email_from_name(first_name: str, last_name: str) -> str: - length = 2 # number of characters in the string. - ran = ''.join(random.choices(string.digits, k=length)) - separator = ['.', '_', ''] - domains = ['gmail.com', 'hotmail.com', 'yahoo.com', 'aol.com', 'outlook.com', 'hotmail.fr', 'gmx.com', - 'hotmail.com', 'yahoo.com', 'aol.com', 'hotmail.com'] - email = "{}{}{}{}@{}".format(last_name.lower(), random.choice(separator), - first_name.replace("-", "").replace("'", "").lower(), ran, - random.choice(domains)) - print(email) - return email - - -def get_random_id_number() -> str: - # write_the_valid_profiles_to_excel() - S = 8 # number of characters in the string. - # call random.choices() string module to find the string in Uppercase + numeric data. - ran = ''.join(random.choices(string.digits, k=S)) - print("The randomly generated string is : 94" + str(ran)) # print the random data - return ran - - -def write_new_contacts_to_excel(valid_contacts: list, generate_passport=True): - row = 0 - col = 0 - # Create a workbook and add a worksheet. - workbook = xlsxwriter.Workbook('real_contacts_{}.xlsx'.format(len(valid_contacts))) - header_data = ['name', 'phone', 'passport', 'email'] - worksheet = workbook.add_worksheet() - header_format = workbook.add_format({'bold': True}) - - for col_num, data in enumerate(header_data): - worksheet.write(row, col_num, data, header_format) - row = row + 1 - for info in valid_contacts: - info.phone = get_random_phone_numbers() - info.passport = get_random_passport_id_number() - info.mail = generate_email_from_name(info.first_name, info.last_name) - # Iterate over the data and write it out row by row. - worksheet.write(row, col, "{} {}".format(info.last_name, info.first_name)) - worksheet.write(row, col + 1, info.phone) - worksheet.write(row, col + 2, info.passport) - worksheet.write(row, col + 3, info.mail) - row += 1 - workbook.close() - - -def write_destinaire_email(valid_contacts: list, generate_passport=True): - row = 0 - col = 0 - # Create a workbook and add a worksheet. - workbook = xlsxwriter.Workbook('real_contacts_{}.xlsx'.format(len(valid_contacts))) - header_data = ['name', 'phone', 'passport', 'email'] - worksheet = workbook.add_worksheet() - header_format = workbook.add_format({'bold': True}) - - for col_num, data in enumerate(header_data): - worksheet.write(row, col_num, data, header_format) - row = row + 1 - for info in valid_contacts: - info.phone = get_random_phone_numbers() - info.passport = get_random_passport_id_number() - info.mail = generate_email_from_name(info.first_name, info.last_name) - # Iterate over the data and write it out row by row. - worksheet.write(row, col, "{} {}".format(info.last_name, info.first_name)) - worksheet.write(row, col + 1, info.phone) - worksheet.write(row, col + 2, info.passport) - worksheet.write(row, col + 3, info.mail) - row += 1 - workbook.close() - - -def save_mails_to_db(): - excel_reader = ExcelHelper() - emails = excel_reader.read_email_pojo("/Users/lpan/Downloads/aol_mails_21.xlsx") - print(emails) - for mail in emails: - MONGO_STORE_MANAGER.save_destinary_emails(mail) - - -if __name__ == '__main__': - excel_reader = ExcelHelper() - excel_reader.read_user_agens() - # contacts = excel_reader.read_names("/Users/lpan/Documents/rdv/backup_500.xlsx") - # print(contacts) - # write_new_contacts_to_excel(valid_contacts=contacts) - - # save_mails_to_db() - # for mail in excel_reader.read_mails_and_pwd(): - # MONGO_STORE_MANAGER.insert_email(mail) - # for i in range(1, 64): - # print(get_random_phone_numbers()) +import json +import random +import string + +import pandas as pandas +import xlsxwriter + +from src.config import CONTACT_LIST_FILE +from src.db.mongo_manager import MONGO_STORE_MANAGER +from src.pojo.contact_pojo import ContactPojo +from src.pojo.mail.mail_pojo import MailAddress +from src.utils.generate_random_passport_id import get_random_passport_id_number + +phone_number_prefix = ['6'] + + +class ExcelHelper: + + def __init__(self): + self._df = pandas.Series() + + def write_to_exel(self, file_name, data_list: list): + new_df = pandas.Series(data_list) + self._df = pandas.concat([self._df, new_df]) + self._df.to_excel(file_name) + + def read_user_agens(self) -> list: + user_agent_in_json = pandas.read_excel( + "/Users/lpan/Documents/workspace/appointment_tool/docs/mobile_user_agent_list.xlsx").to_json( + orient='records') + user_agent_dict_list = json.loads(user_agent_in_json) + user_agent_list = [] + for user_agent_dict in user_agent_dict_list: + user_agent_str = user_agent_dict['user_agent'] + if 'Mozilla' in user_agent_str: + if 'Android 5.1' in user_agent_str: + if 'Mac OS' not in user_agent_str: + user_agent_list.append(user_agent_dict['user_agent']) + print(user_agent_list) + + def read_contacts(self, file_name=CONTACT_LIST_FILE) -> list: + contact_list_in_json = pandas.read_excel(file_name).to_json(orient='records') + contact_dict_list = json.loads(contact_list_in_json) + contact_list = [] + for contact_dict in contact_dict_list: + if contact_dict['name']: + raw_name = contact_dict['name'].strip() + name = raw_name.split(' ') + last_name = name[0] + if len(name) == 2: + first_name = name[-1] + else: + first_name = ''.join(name[1:len(name)]) + + contact = ContactPojo(phone_number=contact_dict['phone'], + last_name=last_name, + first_name=first_name, + passport_number=contact_dict['passport'], + mail=contact_dict['email']) + contact_list.append(contact) + return contact_list + + def read_mails_and_pwd(self, + file_name='/Users/lpan/Desktop/163.xlsx'): + contact_list = [] + mail_list_in_json = pandas.read_excel(file_name).to_json(orient='records') + contact_dict_list = json.loads(mail_list_in_json) + for contact_dict in contact_dict_list: + if contact_dict['mail']: + mail = contact_dict['mail'].strip() + pwd = contact_dict['password'] + contact = MailAddress(mail, pwd) + contact_list.append(contact) + return contact_list + + def read_names(self, file_name=CONTACT_LIST_FILE) -> list: + contact_list_in_json = pandas.read_excel(file_name).to_json(orient='records') + contact_dict_list = json.loads(contact_list_in_json) + contact_list = [] + count = 2 + for contact_dict in contact_dict_list: + if contact_dict['name']: + raw_name = contact_dict['name'].strip() + name = raw_name.split(' ') + if len(name) == 1: + name = raw_name.split('\xa0') + if len(name) == 1: + print("error in " + str(name)) + last_name = name[0] + if len(name) == 2: + first_name = name[-1] + else: + first_name = ''.join(name[1:len(name)]) + + contact = ContactPojo(phone_number="", + last_name=last_name, + first_name=first_name, + passport_number="", + mail="") + + if len(first_name) == 0: + print("first_name is empty: position:" + str(count)) + print(name) + if len(last_name) == 0: + print("last_name is empty: position:" + str(count)) + count = count + 1 + contact_list.append(contact) + + return contact_list + + def read_email_pojo(self, file_name=CONTACT_LIST_FILE) -> list: + email_info_in_json = pandas.read_excel(file_name).to_json(orient='records') + contact_dict_list = json.loads(email_info_in_json) + contact_list = [] + count = 0 + for contact_dict in contact_dict_list: + if contact_dict['email']: + email = contact_dict['email'].strip() + password = contact_dict['code'] + email_destinaire = MailAddress(email, password) + count = count + 1 + contact_list.append(email_destinaire) + + return contact_list + + +def get_random_phone_numbers(): + length = 8 # number of characters in the string. + ran = ''.join(random.choices(string.digits, k=length)) + id_number = random.choice(phone_number_prefix) + str(ran) + return id_number + + +def generate_email_from_name(first_name: str, last_name: str) -> str: + length = 2 # number of characters in the string. + ran = ''.join(random.choices(string.digits, k=length)) + separator = ['.', '_', ''] + domains = ['gmail.com', 'hotmail.com', 'yahoo.com', 'aol.com', 'outlook.com', 'hotmail.fr', 'gmx.com', + 'hotmail.com', 'yahoo.com', 'aol.com', 'hotmail.com'] + email = "{}{}{}{}@{}".format(last_name.lower(), random.choice(separator), + first_name.replace("-", "").replace("'", "").lower(), ran, + random.choice(domains)) + print(email) + return email + + +def get_random_id_number() -> str: + # write_the_valid_profiles_to_excel() + S = 8 # number of characters in the string. + # call random.choices() string module to find the string in Uppercase + numeric data. + ran = ''.join(random.choices(string.digits, k=S)) + print("The randomly generated string is : 94" + str(ran)) # print the random data + return ran + + +def write_new_contacts_to_excel(valid_contacts: list, generate_passport=True): + row = 0 + col = 0 + # Create a workbook and add a worksheet. + workbook = xlsxwriter.Workbook('real_contacts_{}.xlsx'.format(len(valid_contacts))) + header_data = ['name', 'phone', 'passport', 'email'] + worksheet = workbook.add_worksheet() + header_format = workbook.add_format({'bold': True}) + + for col_num, data in enumerate(header_data): + worksheet.write(row, col_num, data, header_format) + row = row + 1 + for info in valid_contacts: + info.phone = get_random_phone_numbers() + info.passport = get_random_passport_id_number() + info.mail = generate_email_from_name(info.first_name, info.last_name) + # Iterate over the data and write it out row by row. + worksheet.write(row, col, "{} {}".format(info.last_name, info.first_name)) + worksheet.write(row, col + 1, info.phone) + worksheet.write(row, col + 2, info.passport) + worksheet.write(row, col + 3, info.mail) + row += 1 + workbook.close() + + +def write_destinaire_email(valid_contacts: list, generate_passport=True): + row = 0 + col = 0 + # Create a workbook and add a worksheet. + workbook = xlsxwriter.Workbook('real_contacts_{}.xlsx'.format(len(valid_contacts))) + header_data = ['name', 'phone', 'passport', 'email'] + worksheet = workbook.add_worksheet() + header_format = workbook.add_format({'bold': True}) + + for col_num, data in enumerate(header_data): + worksheet.write(row, col_num, data, header_format) + row = row + 1 + for info in valid_contacts: + info.phone = get_random_phone_numbers() + info.passport = get_random_passport_id_number() + info.mail = generate_email_from_name(info.first_name, info.last_name) + # Iterate over the data and write it out row by row. + worksheet.write(row, col, "{} {}".format(info.last_name, info.first_name)) + worksheet.write(row, col + 1, info.phone) + worksheet.write(row, col + 2, info.passport) + worksheet.write(row, col + 3, info.mail) + row += 1 + workbook.close() + + +def save_mails_to_db(): + excel_reader = ExcelHelper() + emails = excel_reader.read_email_pojo("/Users/lpan/Downloads/aol_mails_21.xlsx") + print(emails) + for mail in emails: + MONGO_STORE_MANAGER.save_destinary_emails(mail) + + +if __name__ == '__main__': + excel_reader = ExcelHelper() + excel_reader.read_user_agens() + # contacts = excel_reader.read_names("/Users/lpan/Documents/rdv/backup_500.xlsx") + # print(contacts) + # write_new_contacts_to_excel(valid_contacts=contacts) + + # save_mails_to_db() + # for mail in excel_reader.read_mails_and_pwd(): + # MONGO_STORE_MANAGER.insert_email(mail) + # for i in range(1, 64): + # print(get_random_phone_numbers()) diff --git a/src/workers/commandor_page.py b/src/workers/commandor_page.py index 75a65ce..b8ba279 100644 --- a/src/workers/commandor_page.py +++ b/src/workers/commandor_page.py @@ -1,410 +1,461 @@ -import datetime -import logging -import random -import re -import sys -import threading -import time -import traceback -from typing import Union - -from src import params, definitions -from src.db.mongo_manager import MONGO_STORE_MANAGER -from src.pojo.ModeEnum import ModeEnum -from src.pojo.ReserveResultPojo import ReserveResultPojo, PublishType -from src.pojo.contact_pojo import ContactPojo -from src.proxy.proxy_type import ProxyType -from src.workers.SolveCaptch import SolveCaptcha -from src.workers.TlsPlaywright import TlsPlaywright - -RDV_URL = "https://rendezvousparis.hermes.com/client/register" - -# RDV_URL = "file:///Users/lpan/Downloads/test_appointment.html" -# RDV_URL = "https://api.ipify.org" -# RDV_URL ="https://bot.sannysoft.com/" -REGEX_RDV_URL = "https:\/\/rendezvousparis\.hermes\.com\/client\/register\/[A-Z0-9]+" -otp_value = None -OTP_FIELD_ID = "#sms_code" -MESSAGE_FIELD_CLASS = ".message" -BLANK_URL = "about:blank" -CONFIRMED_MESSAGE = "Your request for a Leather Goods appointment has been registered" -CONFIRMED_MESSAGE_FR = "Votre demande de rendez-vous Maroquinerie a bien été enregistrée et nous vous en remercions." -MESSAGE_URL_VALIDATION_FR = "Nous avons envoyé un lien par e-mail." -DOUBLE_REQUEST_ERROR_MESSAGE = "A request with the same data has already been validated today." -DOUBLE_REQUEST_ERROR_MESSAGE_FR = "Une demande avec les données saisies a déjà été validée aujourd’hui." -TOO_MANY_REQUEST_ERROR_MESSAGE = "Due to a large number of requests" -TOO_MANY_REQUEST_ERROR_MESSAGE_FR = "Suite à un trop grand nombre de demandes" -CAPTCHA_ERROR_MESSAGE = "Error verifying captcha, please try again" -CAPTCHA_ERROR_MESSAGE_FR = "La vérification du captcha a échoué" -TIME_OUT = 10 * 60 * 1000 # 10 mins -OTP_TIMEOUT = 240 -PAGE_TIMEOUT = 40000 - - -def get_random_wait_time() -> float: - wait_time = random.randint(0, 10) / 10.0 * 1 - return wait_time - - -class CommandorPage: - tls = TlsPlaywright() - - def __init__(self, contact: ContactPojo, store_type=0, proxy_type=ProxyType.BRIGHT_DATA, - mode: ModeEnum = ModeEnum.MANUAL, headless=False): - self.otp_value = None - self.is_finished = False - self.contact = contact - self.contact.phone = self.contact.phone.replace(".0", "") # remove the .0 if the Excel format is not correct - self.logger = logging.getLogger("约会页面:" + str(self.contact.phone)) - self.proxy_type = proxy_type - self.is_event_sent = False - self.is_captcha_in_error = False - self.is_filling_fields = False - self.headless = headless - self.appointment_mode = mode - # 0: random - # 1: faubourg - # 2: George - # 3: Sèvres - self.store_map = { - 1: "faubourg", - 2: "georgev", - 3: "sevres" - } - self.store_type = store_type - - def on_success(self, result: ReserveResultPojo): - self.logger.info("on_success called.") - self.is_finished = True - if not self.is_event_sent: - self.logger.info("will send successful event") - self.logger.info(result) - params.oracle_log_sender.send_appoint_result(result) - self.is_event_sent = True - - def timeout_occurred(self): - params.oracle_log_sender.send_timeout_log(self.contact) - self.logger.info("will close timeout modem") - self.termine() - - def _run(self, proxy): - self.logger.info("will start browser") - self.on_success_listener = on_success - # reset otp_value to None - self.otp_value = None - devices = random.choice(params.DEVICES) - first_page = None - while first_page is None: - first_page = self.start_browser(proxy, self.tls.playwright, devices) - proxy = params.get_proxy(self.proxy_type) - # self.thread_event = e - otp_input = self.page.locator(OTP_FIELD_ID) - otp_input.wait_for(state='visible', timeout=TIME_OUT) - self.logger.info("timeout") - self.termine() - - def fill_fields(self): - if not self.is_filling_fields: - self.is_filling_fields = True - self.logger.info("填充信息: " + str(self.contact.phone)) - self._set_name(self.contact.last_name, self.contact.first_name) - self._setPhoneCountryAndStore() - self._set_phone_number("0" + str(self.contact.phone)) - self._set_email(self.contact.mail) - self._set_id_number(self.contact.passport) - self._checkCgu() - if self.appointment_mode == ModeEnum.AUTOMATIC: - self.resolve_captcha() - self.is_filling_fields = False - - def start_browser(self, proxy, pwright, device) -> Union[str, None]: - try: - self.browser = pwright.firefox.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy) - # userAgent = random.choice(params.firefox_user_agent_list) - simulated_mobile = pwright.devices[device] - userAgent = simulated_mobile['user_agent'] - print("user_agent is " + userAgent) - # context = self.browser.new_context(**simulated_mobile, locale='fr-FR') - context = self.browser.new_context(user_agent=userAgent, locale='fr-FR') - self.page = context.new_page() - # hide webdriver information - self.page.add_init_script("""() => { - Object.defineProperty(navigator,'webdriver',{get: () => undefined}); - Object.defineProperty(navigator, 'platform', { - get: () => { - return "iPhone"; - }}); - } - """) - self.page.on("load", self._on_page_loaded) - self.page.on("response", self.handle_response) - self.page.goto(RDV_URL, timeout=PAGE_TIMEOUT) - return self.page.content() - # self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy) - # self.logger.info("模拟设备: " + device) - # simulated_mobile = pwright.devices[device] - # context = self.browser.new_context(**simulated_mobile, locale='fr-FR') - # self.page = context.new_page() - # # hide webdriver information - # self.page.add_init_script("""() => { - # Object.defineProperty(navigator,'webdriver',{get: () => undefined}); - # Object.defineProperty(navigator, 'platform', { - # get: () => { - # return "iPhone"; - # }}); - # } - # """) - # self.page.on("load", self._on_page_loaded) - # self.page.on("response", self.handle_response) - # self.page.goto(RDV_URL, timeout=PAGE_TIMEOUT) - # captcha_url = "geo.captcha-delivery.com/captcha" - # if captcha_url in self.page.content(): - # self.logger.info("will close browser") - # self.browser.close() - # return None - # else: - # return self.page.content() - except Exception as error: - params.oracle_log_sender.send_error(str(error)) - traceback.print_exc(*sys.exc_info()) - self.logger.exception(error) - self.logger.info("will close browser") - self.browser.close() - return None - - def handle_response(self, response): - pattern = re.compile(REGEX_RDV_URL) - if pattern.match(response.url): - self.logger.info("result url found: " + response.url) - # self.publish_message_to_queue(self.contact, PublishType.PENDING, response.url) - - def start_page(self, proxy): - e = threading.Event() - self._run(proxy) - - def _on_page_loaded(self): - # time.sleep(40000) - self.logger.info("页面加载完毕") - self.logger.info("url is " + self.page.url) - captcha_url = "geo.captcha-delivery.com/captcha" - if captcha_url not in self.page.content(): - if self.page.url == RDV_URL: - self.fill_fields() - try: - message = self.page.content() - if CONFIRMED_MESSAGE_FR in message or MESSAGE_URL_VALIDATION_FR in message: - # publish the successful message - self.publish_message_to_queue(self.contact, PublishType.SUCCESS, self.page.url) - self.get_errors() - except Exception as error: - self.logger.error(error) - - def on_document_loaded(self): - self.logger.info("on_document_loaded called") - - def _setPhoneCountryAndStore(self): - try: - if self.store_type == 0: - self.page.evaluate("""()=>{ - //document.getElementById("phone_country").focus(); - document.getElementById("phone_country").value = \"FR\" }""") - else: - store_to_choose = self.store_map[self.store_type] - self.page.evaluate("""(store_to_choose)=>{ - document.getElementById("prefer").value = store_to_choose; - //document.getElementById("phone_country").focus(); - document.getElementById("phone_country").value = \"FR\" }""", store_to_choose) - except Exception as error: - self.logger.error(error) - - def _set_phone_number(self, phoneNumber): - time.sleep(get_random_wait_time()) - try: - self.page.evaluate("""(phoneNumber)=>document.getElementById("phone_number").value =phoneNumber""", - phoneNumber) - except Exception as error: - self.logger.error(error) - - def _set_name(self, lastName, firstName): - time.sleep(get_random_wait_time()) - try: - self.page.evaluate("""(name)=> { - let surname = document.getElementById("surname"); - if(surname.value.length == 0){ - // surname.focus(); - surname.value = name.lastName; - document.getElementById("name").focus(); - document.getElementById("name").value = name.firstName - }} - """, {'lastName': lastName, 'firstName': firstName}) - except Exception as error: - self.logger.error(error) - - def get_errors(self): - # send error result - if self.page.url != BLANK_URL: - # no need to push blank url to db - if self.page.url != RDV_URL: - # no need to push RDV url to db - self.publish_message_to_queue(self.contact, PublishType.ERROR, self.page.url) - try: - items = self.page.query_selector("div.alert") - if items: - erro_content = items.inner_html() - self.logger.info("错误:" + erro_content) - self._handle_errors(erro_content) - except Exception as ext: - self.logger.error(ext) - - def _handle_errors(self, erro_content: str): - if DOUBLE_REQUEST_ERROR_MESSAGE in erro_content or DOUBLE_REQUEST_ERROR_MESSAGE_FR in erro_content: - # this email has been already used - if not self.is_finished: - params.oracle_log_sender.send_double_data_error(self.contact) - MONGO_STORE_MANAGER.delete_captcha_error_contact_for_current_day(self.contact) - self.is_finished = True - self.termine() - elif TOO_MANY_REQUEST_ERROR_MESSAGE in erro_content or TOO_MANY_REQUEST_ERROR_MESSAGE_FR in erro_content: - # this email is in black list - if not self.is_finished: - params.oracle_log_sender.send_too_many_error(self.contact) - MONGO_STORE_MANAGER.insert_blacklist_contact(self.contact) - self.is_finished = True - self.termine() - elif CAPTCHA_ERROR_MESSAGE in erro_content or CAPTCHA_ERROR_MESSAGE_FR in erro_content: - # this email has been already used - self.is_captcha_in_error = True - if not self.is_finished: - # save the error to database with contact info - self.handle_captcha_error() - self.is_finished = True - # no need to retry captcha, if retry ,will generate DOUBLE_REQUEST_ERROR_MESSAGE - self.termine() - # self.resolve_captcha() - - def _set_email(self, email): - time.sleep(get_random_wait_time()) - try: - self.page.evaluate("""(email)=>{ - let emailElement = document.getElementById("email") - if(emailElement.value.length == 0){ - emailElement.focus(); - document.getElementById("email").value = email;}}""", email) - except Exception as error: - self.logger.error(error) - - def _set_id_number(self, id): - time.sleep(get_random_wait_time()) - try: - self.page.evaluate(""" (id) =>{ - document.getElementById("passport_id").focus(); - document.getElementById("passport_id").value = id}""", id) - except Exception as error: - self.logger.error(error) - - def _checkCgu(self): - try: - self.page.evaluate(""" - document.getElementById("cgu").focus(); - document.getElementById("cgu").checked = true; - document.getElementById("processing").focus(); - document.getElementById("processing").checked = true""") - except Exception as error: - self.logger.error(error) - - def clickOnValidBtn(self): - time.sleep(get_random_wait_time()) - try: - self.page.evaluate("""document.getElementsByClassName("btn")[0].focus();""") - time.sleep(get_random_wait_time()) - self.page.evaluate(""" - document.getElementsByClassName("btn")[0].click();""") - except Exception as error: - self.logger.error(error) - - def fill_otp(self, otp: str): - self.page.focus(OTP_FIELD_ID) - time.sleep(get_random_wait_time()) - self.page.fill(OTP_FIELD_ID, otp) - - def termine(self): - self.logger.info("will close browser") - time.sleep(1) - self.browser.close() - - def publish_message_to_queue(self, contact: ContactPojo, status: PublishType, url: str): - # create the message - id = url.split("/")[-1] - result = ReserveResultPojo(type=status, phone=contact.phone, message=status.value, url=url, - firstName=contact.first_name, lastName=contact.last_name, email=contact.mail, - passport=contact.passport, ccid=contact.ccid) - result.id = id - result.store_type = self.store_type - definitions.firebase_store_manager.save(result) - collection_name = str(datetime.date.today()) - MONGO_STORE_MANAGER.insert_reserve_result(collection_name=collection_name, reserve=result) - MONGO_STORE_MANAGER.delete_captcha_error_contact_for_current_day(self.contact) - MONGO_STORE_MANAGER.remove_contact_from_black_list(self.contact) - if status is PublishType.SUCCESS: - self.on_success(result) - time.sleep(2) - self.browser.close() - - def resolve_captcha(self): - self.captcha_solver = SolveCaptcha(self.page) - self.captcha_solver.start(self.fill_captcha_solution) - - def fill_captcha_solution(self, solution): - self.logger.info("will input solution") - try: - self.page.evaluate("""(solution)=>{ - document.getElementById("g-recaptcha-response").innerHTML=solution;}""", solution) - self.logger.info("will click on valid btn") - self.clickOnValidBtn() - # wait for 20s - time.sleep(20) - if not self.is_finished: - if not self.is_captcha_in_error: - self.clickOnValidBtn() - else: - self.is_captcha_in_error = False - - except Exception as error: - self.logger.error(error) - self.page.reload(timeout=PAGE_TIMEOUT) - - def handle_captcha_error(self): - MONGO_STORE_MANAGER.insert_captcha_error_contact(self.contact) - params.oracle_log_sender.send_captcha_error(self.contact) - - -def on_success(result: ReserveResultPojo): - pass - - -def launch_page(): - contact = ContactPojo(phone_number="+33758912245", passport_number="82546975", last_name="XU", - first_name="xingzhen", - mail="ColbyPatel653@gmail.com", ccid="", position=0) - page = CommandorPage(contact, store_type=1) - return page.start_page(params.get_proxy(ProxyType.BRIGHT_DATA)) - - -def wait_for_otp(event: threading.Event, commandor: CommandorPage): - sec = input("Press Enter otp to continue...\n") - print("input otp is: " + sec) - commandor.otp_value = sec - event.set() - - -if __name__ == '__main__': - launch_page() - # time = get_random_wait_time() - # print(time) - # import urllib.request - # - # proxy = urllib.request.ProxyHandler({'https': 'http://panleicim-res-fr-121:94sY7zwBG13i@gw.ntnt.io:5959'}) - # opener = urllib.request.build_opener(proxy) - # urllib.request.install_opener(opener) - # content = urllib.request.urlopen('https://api.ipify.org').read() - # print(content) +import datetime +import logging +import random +import re +import sys +import threading +import time +import traceback +from typing import Union + +from anticaptchaofficial.antigatetask import antigateTask +from playwright._impl._api_structures import SetCookieParam +from playwright_stealth import stealth_sync + +from src import params, definitions +from src.db.mongo_manager import MONGO_STORE_MANAGER +from src.pojo.ModeEnum import ModeEnum +from src.pojo.ReserveResultPojo import ReserveResultPojo, PublishType +from src.pojo.contact_pojo import ContactPojo +from src.proxy.proxy_type import ProxyType +from src.workers.SolveCaptch import SolveCaptcha +from src.workers.TlsPlaywright import TlsPlaywright + +RDV_URL = "https://rendezvousparis.hermes.com/client/register" + +# RDV_URL = "file:///Users/lpan/Downloads/test_appointment.html" +# RDV_URL = "https://api.ipify.org" +# RDV_URL ="https://bot.sannysoft.com/" +REGEX_RDV_URL = "https:\/\/rendezvousparis\.hermes\.com\/client\/register\/[A-Z0-9]+" +otp_value = None +OTP_FIELD_ID = "#sms_code" +MESSAGE_FIELD_CLASS = ".message" +BLANK_URL = "about:blank" +CONFIRMED_MESSAGE = "Your request for a Leather Goods appointment has been registered" +CONFIRMED_MESSAGE_FR = "Votre demande de rendez-vous Maroquinerie a bien été enregistrée et nous vous en remercions." +MESSAGE_URL_VALIDATION_FR = "Nous avons envoyé un lien par e-mail." +DOUBLE_REQUEST_ERROR_MESSAGE = "A request with the same data has already been validated today." +DOUBLE_REQUEST_ERROR_MESSAGE_FR = "Une demande avec les données saisies a déjà été validée aujourd’hui." +TOO_MANY_REQUEST_ERROR_MESSAGE = "Due to a large number of requests" +TOO_MANY_REQUEST_ERROR_MESSAGE_FR = "Suite à un trop grand nombre de demandes" +CAPTCHA_ERROR_MESSAGE = "Error verifying captcha, please try again" +CAPTCHA_ERROR_MESSAGE_FR = "La vérification du captcha a échoué" +TIME_OUT = 10 * 60 * 1000 # 10 mins +OTP_TIMEOUT = 240 +PAGE_TIMEOUT = 40000 + + +def get_random_wait_time() -> float: + wait_time = random.randint(0, 10) / 10.0 * 1 + return wait_time + + +class CommandorPage: + tls = TlsPlaywright() + + def __init__(self, contact: ContactPojo, store_type=0, proxy_type=ProxyType.BRIGHT_DATA, + mode: ModeEnum = ModeEnum.MANUAL, headless=False): + self.otp_value = None + self.is_finished = False + self.current_context = None + self.contact = contact + self.contact.phone = self.contact.phone.replace(".0", "") # remove the .0 if the Excel format is not correct + self.logger = logging.getLogger("约会页面:" + str(self.contact.phone)) + self.proxy_type = proxy_type + self.is_event_sent = False + self.is_captcha_in_error = False + self.is_filling_fields = False + self.headless = headless + self.appointment_mode = mode + # 0: random + # 1: faubourg + # 2: George + # 3: Sèvres + self.store_map = { + 1: "faubourg", + 2: "georgev", + 3: "sevres" + } + self.store_type = store_type + + def on_success(self, result: ReserveResultPojo): + self.logger.info("on_success called.") + self.is_finished = True + if not self.is_event_sent: + self.logger.info("will send successful event") + self.logger.info(result) + params.oracle_log_sender.send_appoint_result(result) + self.is_event_sent = True + + def timeout_occurred(self): + params.oracle_log_sender.send_timeout_log(self.contact) + self.logger.info("will close timeout modem") + self.termine() + + def _run(self, proxy): + self.logger.info("will start browser") + self.on_success_listener = on_success + # reset otp_value to None + self.otp_value = None + devices = random.choice(params.DEVICES) + first_page = None + while first_page is None: + first_page = self.start_browser(proxy, self.tls.playwright, devices) + proxy = params.get_proxy(self.proxy_type) + # self.thread_event = e + otp_input = self.page.locator(OTP_FIELD_ID) + otp_input.wait_for(state='visible', timeout=TIME_OUT) + self.logger.info("timeout") + self.termine() + + def fill_fields(self): + if not self.is_filling_fields: + self.is_filling_fields = True + self.logger.info("填充信息: " + str(self.contact.phone)) + self._set_name(self.contact.last_name, self.contact.first_name) + self._setPhoneCountryAndStore() + self._set_phone_number("0" + str(self.contact.phone)) + self._set_email(self.contact.mail) + self._set_id_number(self.contact.passport) + self._checkCgu() + if self.appointment_mode == ModeEnum.AUTOMATIC: + self.resolve_captcha() + self.is_filling_fields = False + + def start_browser(self, proxy, pwright, device) -> Union[str, None]: + try: + self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy) + # userAgent = random.choice(params.firefox_user_agent_list) + simulated_mobile = pwright.devices[device] + userAgent = simulated_mobile['user_agent'] + print("user_agent is " + userAgent) + # context = self.browser.new_context(**simulated_mobile, locale='fr-FR') + context = self.browser.new_context(**simulated_mobile, locale='fr-FR') + self.current_context = context + self.create_and_config_page(context) + return self.page.content() + # self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy) + # self.logger.info("模拟设备: " + device) + # simulated_mobile = pwright.devices[device] + # context = self.browser.new_context(**simulated_mobile, locale='fr-FR') + # self.page = context.new_page() + # # hide webdriver information + # self.page.add_init_script("""() => { + # Object.defineProperty(navigator,'webdriver',{get: () => undefined}); + # Object.defineProperty(navigator, 'platform', { + # get: () => { + # return "iPhone"; + # }}); + # } + # """) + # self.page.on("load", self._on_page_loaded) + # self.page.on("response", self.handle_response) + # self.page.goto(RDV_URL, timeout=PAGE_TIMEOUT) + # captcha_url = "geo.captcha-delivery.com/captcha" + # if captcha_url in self.page.content(): + # self.logger.info("will close browser") + # self.browser.close() + # return None + # else: + # return self.page.content() + except Exception as error: + params.oracle_log_sender.send_error(str(error)) + traceback.print_exc(*sys.exc_info()) + self.logger.exception(error) + self.logger.info("will close browser") + self.browser.close() + return None + + def create_and_config_page(self, context): + self.page = context.new_page() + stealth_sync(self.page) + # hide webdriver information + self.page.add_init_script("""() => { + Object.defineProperty(navigator,'webdriver',{get: () => undefined}); + Object.defineProperty(navigator, 'platform', { + get: () => { + return "iPhone"; + }}); + } + """) + self.page.on("load", self._on_page_loaded) + self.page.on("response", self.handle_response) + self.page.goto(RDV_URL, timeout=PAGE_TIMEOUT) + + def handle_response(self, response): + pattern = re.compile(REGEX_RDV_URL) + if pattern.match(response.url): + self.logger.info("result url found: " + response.url) + # self.publish_message_to_queue(self.contact, PublishType.PENDING, response.url) + + def start_page(self, proxy): + e = threading.Event() + self._run(proxy) + + def solve_datadome_captcha(self): + print("solve_datadome_captcha") + solver = antigateTask() + solver.set_verbose(1) + solver.set_key("ede6a69396fc961af351e7c8ffda9059") + solver.set_website_url(RDV_URL) + solver.set_template_name("Anti-bot screen bypass") + solver.set_variables({ + "css_selector": ".captcha__human__container" + }) + result = solver.solve_and_return_solution() + if result != 0: + cookies, localStorage, fingerprint, url, domain = result["cookies"], result["localStorage"], result[ + "fingerprint"], result["url"], result["domain"] + print("cookies: ", cookies) + print("localStorage: ", localStorage) + print("fingerprint: ", fingerprint) + print("url: " + url) + print("domain: " + domain) + # add cookies to playwright + cookie_list = [] + cookie_list.append(SetCookieParam(name='datadome', value=cookies['datadome'], url=url)) + self.page.context.add_cookies(cookie_list) + self.config_page_with_fingerprint(fingerprint) + self.page.reload() + else: + print("task finished with error " + solver.error_code) + + def _on_page_loaded(self): + # time.sleep(40000) + self.logger.info("页面加载完毕") + self.logger.info("url is " + self.page.url) + captcha_url = "geo.captcha-delivery.com/captcha" + if captcha_url not in self.page.content(): + if self.page.url == RDV_URL: + self.fill_fields() + try: + message = self.page.content() + if CONFIRMED_MESSAGE_FR in message or MESSAGE_URL_VALIDATION_FR in message: + # publish the successful message + self.publish_message_to_queue(self.contact, PublishType.SUCCESS, self.page.url) + self.get_errors() + except Exception as error: + self.logger.error(error) + # else: + # self.solve_datadome_captcha() + + def on_document_loaded(self): + self.logger.info("on_document_loaded called") + + def _setPhoneCountryAndStore(self): + try: + if self.store_type == 0: + self.page.evaluate("""()=>{ + //document.getElementById("phone_country").focus(); + document.getElementById("phone_country").value = \"FR\" }""") + else: + store_to_choose = self.store_map[self.store_type] + self.page.evaluate("""(store_to_choose)=>{ + document.getElementById("prefer").value = store_to_choose; + //document.getElementById("phone_country").focus(); + document.getElementById("phone_country").value = \"FR\" }""", store_to_choose) + except Exception as error: + self.logger.error(error) + + def _set_phone_number(self, phoneNumber): + time.sleep(get_random_wait_time()) + try: + self.page.evaluate("""(phoneNumber)=>document.getElementById("phone_number").value =phoneNumber""", + phoneNumber) + except Exception as error: + self.logger.error(error) + + def _set_name(self, lastName, firstName): + time.sleep(get_random_wait_time()) + try: + self.page.evaluate("""(name)=> { + let surname = document.getElementById("surname"); + if(surname.value.length == 0){ + // surname.focus(); + surname.value = name.lastName; + document.getElementById("name").focus(); + document.getElementById("name").value = name.firstName + }} + """, {'lastName': lastName, 'firstName': firstName}) + except Exception as error: + self.logger.error(error) + + def get_errors(self): + # send error result + if self.page.url != BLANK_URL: + # no need to push blank url to db + if self.page.url != RDV_URL: + # no need to push RDV url to db + self.publish_message_to_queue(self.contact, PublishType.ERROR, self.page.url) + try: + items = self.page.query_selector("div.alert") + if items: + erro_content = items.inner_html() + self.logger.info("错误:" + erro_content) + self._handle_errors(erro_content) + except Exception as ext: + self.logger.error(ext) + + def _handle_errors(self, erro_content: str): + if DOUBLE_REQUEST_ERROR_MESSAGE in erro_content or DOUBLE_REQUEST_ERROR_MESSAGE_FR in erro_content: + # this email has been already used + if not self.is_finished: + params.oracle_log_sender.send_double_data_error(self.contact) + MONGO_STORE_MANAGER.delete_captcha_error_contact_for_current_day(self.contact) + self.is_finished = True + self.termine() + elif TOO_MANY_REQUEST_ERROR_MESSAGE in erro_content or TOO_MANY_REQUEST_ERROR_MESSAGE_FR in erro_content: + # this email is in black list + if not self.is_finished: + params.oracle_log_sender.send_too_many_error(self.contact) + MONGO_STORE_MANAGER.insert_blacklist_contact(self.contact) + self.is_finished = True + self.termine() + elif CAPTCHA_ERROR_MESSAGE in erro_content or CAPTCHA_ERROR_MESSAGE_FR in erro_content: + # this email has been already used + self.is_captcha_in_error = True + if not self.is_finished: + # save the error to database with contact info + self.handle_captcha_error() + self.is_finished = True + # no need to retry captcha, if retry ,will generate DOUBLE_REQUEST_ERROR_MESSAGE + self.termine() + # self.resolve_captcha() + + def _set_email(self, email): + time.sleep(get_random_wait_time()) + try: + self.page.evaluate("""(email)=>{ + let emailElement = document.getElementById("email") + if(emailElement.value.length == 0){ + emailElement.focus(); + document.getElementById("email").value = email;}}""", email) + except Exception as error: + self.logger.error(error) + + def _set_id_number(self, id): + time.sleep(get_random_wait_time()) + try: + self.page.evaluate(""" (id) =>{ + document.getElementById("passport_id").focus(); + document.getElementById("passport_id").value = id}""", id) + except Exception as error: + self.logger.error(error) + + def _checkCgu(self): + try: + self.page.evaluate(""" + document.getElementById("cgu").focus(); + document.getElementById("cgu").checked = true; + document.getElementById("processing").focus(); + document.getElementById("processing").checked = true""") + except Exception as error: + self.logger.error(error) + + def clickOnValidBtn(self): + time.sleep(get_random_wait_time()) + try: + self.page.evaluate("""document.getElementsByClassName("btn")[0].focus();""") + time.sleep(get_random_wait_time()) + self.page.evaluate(""" + document.getElementsByClassName("btn")[0].click();""") + except Exception as error: + self.logger.error(error) + + def fill_otp(self, otp: str): + self.page.focus(OTP_FIELD_ID) + time.sleep(get_random_wait_time()) + self.page.fill(OTP_FIELD_ID, otp) + + def termine(self): + self.logger.info("will close browser") + time.sleep(1) + self.browser.close() + + def publish_message_to_queue(self, contact: ContactPojo, status: PublishType, url: str): + # create the message + id = url.split("/")[-1] + result = ReserveResultPojo(type=status, phone=contact.phone, message=status.value, url=url, + firstName=contact.first_name, lastName=contact.last_name, email=contact.mail, + passport=contact.passport, ccid=contact.ccid) + result.id = id + result.store_type = self.store_type + definitions.firebase_store_manager.save(result) + collection_name = str(datetime.date.today()) + MONGO_STORE_MANAGER.insert_reserve_result(collection_name=collection_name, reserve=result) + MONGO_STORE_MANAGER.delete_captcha_error_contact_for_current_day(self.contact) + MONGO_STORE_MANAGER.remove_contact_from_black_list(self.contact) + if status is PublishType.SUCCESS: + self.on_success(result) + time.sleep(2) + self.browser.close() + + def resolve_captcha(self): + self.captcha_solver = SolveCaptcha(self.page) + self.captcha_solver.start(self.fill_captcha_solution) + + def fill_captcha_solution(self, solution): + self.logger.info("will input solution") + try: + self.page.evaluate("""(solution)=>{ + document.getElementById("g-recaptcha-response").innerHTML=solution;}""", solution) + self.logger.info("will click on valid btn") + self.clickOnValidBtn() + # wait for 20s + time.sleep(20) + if not self.is_finished: + if not self.is_captcha_in_error: + self.clickOnValidBtn() + else: + self.is_captcha_in_error = False + + except Exception as error: + self.logger.error(error) + self.page.reload(timeout=PAGE_TIMEOUT) + + def handle_captcha_error(self): + MONGO_STORE_MANAGER.insert_captcha_error_contact(self.contact) + params.oracle_log_sender.send_captcha_error(self.contact) + + def config_page_with_fingerprint(self, fingerprint): + self.page.add_init_script("""() => { + Object.defineProperty(navigator,'webdriver',{get: () => undefined}); + Object.defineProperty(navigator, 'platform', { + get: () => { + return "iPhone"; + }}); + + } + """) + + +def on_success(result: ReserveResultPojo): + pass + + +def launch_page(): + contact = ContactPojo(phone_number="+33758912245", passport_number="82546975", last_name="XU", + first_name="xingzhen", + mail="ColbyPatel653@gmail.com", ccid="", position=0) + page = CommandorPage(contact, store_type=1) + return page.start_page(params.get_proxy(ProxyType.BRIGHT_DATA)) + + +def wait_for_otp(event: threading.Event, commandor: CommandorPage): + sec = input("Press Enter otp to continue...\n") + print("input otp is: " + sec) + commandor.otp_value = sec + event.set() + + +if __name__ == '__main__': + launch_page() + # time = get_random_wait_time() + # print(time) + # import urllib.request + # + # proxy = urllib.request.ProxyHandler({'https': 'http://panleicim-res-fr-121:94sY7zwBG13i@gw.ntnt.io:5959'}) + # opener = urllib.request.build_opener(proxy) + # urllib.request.install_opener(opener) + # content = urllib.request.urlopen('https://api.ipify.org').read() + # print(content) diff --git a/src/workers/link_validator.py b/src/workers/link_validator.py index cb752e9..dc1ae06 100644 --- a/src/workers/link_validator.py +++ b/src/workers/link_validator.py @@ -1,135 +1,135 @@ -import logging -import random -import traceback -from typing import Union - -import sys -import time - -from src import params -from src.db.mongo_manager import MONGO_STORE_MANAGER -from src.pojo.ReserveResultPojo import PublishType -from src.proxy.proxy_type import ProxyType -from src.workers.TlsPlaywright import TlsPlaywright - -OTP_FIELD_ID = "#sms_code" -TIME_OUT = 10 * 60 * 1000 # 10 mins -PAGE_TIMEOUT = 40000 -CONFIRMED_MESSAGE_FR = "Votre demande de rendez-vous Maroquinerie a bien été enregistrée et nous vous en remercions." -SORRY_SENTENCE_FR = "nous sommes sincèrement désolés de n'avoir pu vous satisfaire cette fois-ci" -DOUBLE_REQUEST_ERROR_MESSAGE_FR = "Une demande avec les données saisies a déjà été validée aujourd’hui." - - -class LinkValidator: - tls = TlsPlaywright() - - def __init__(self, link: str, proxy_type=ProxyType.BRIGHT_DATA, headless=False): - self.is_finished = False - self.link = link - self.proxy_type = proxy_type - self.is_event_sent = False - self.is_captcha_in_error = False - self.is_filling_fields = False - self.headless = headless - self.logger = logging.getLogger("LinkValidator") - - def on_success(self): - self.logger.info("on_success called.") - self.is_finished = True - if not self.is_event_sent: - self.logger.info("will send successful event") - params.oracle_log_sender.send_url_validation_result() - self.is_event_sent = True - - def timeout_occurred(self): - params.oracle_log_sender.send_timeout_log(self.link) - self.logger.info("will close timeout modem") - self.termine() - - def _run(self, proxy): - self.logger.info("will start browser") - # reset otp_value to None - devices = random.choice(params.DEVICES) - first_page = None - while first_page is None: - first_page = self.start_browser(proxy, self.tls.playwright, devices) - proxy = params.get_proxy(self.proxy_type) - otp_input = self.page.locator(OTP_FIELD_ID) - otp_input.wait_for(state='visible', timeout=TIME_OUT) - self.logger.info("timeout") - self.termine() - - def start_browser(self, proxy, pwright, device) -> Union[str, None]: - try: - self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy) - self.logger.info("模拟设备: " + device) - simulated_mobile = pwright.devices[device] - context = self.browser.new_context(**simulated_mobile, locale='fr-FR') - self.page = context.new_page() - # hide webdriver information - self.page.add_init_script("""() => { - Object.defineProperty(navigator,'webdriver',{get: () => undefined}); - Object.defineProperty(navigator, 'platform', { - get: () => { - return "iPhone"; - }}); -} - """) - self.page.on("load", self._on_page_loaded) - self.page.goto(self.link, timeout=PAGE_TIMEOUT) - captcha_url = "geo.captcha-delivery.com/captcha" - if captcha_url in self.page.content(): - self.logger.info("will close browser") - self.browser.close() - return None - else: - return self.page.content() - except Exception as error: - params.oracle_log_sender.send_error(str(error)) - traceback.print_exc(*sys.exc_info()) - self.logger.exception(error) - self.logger.info("will close browser") - self.browser.close() - return None - - def start_page(self, proxy, headless=False): - self.headless = headless - self._run(proxy) - - def _on_page_loaded(self): - self.logger.info("页面加载完毕") - self.logger.info("url is " + self.page.url) - try: - message = self.page.content() - if CONFIRMED_MESSAGE_FR in message: - # publish the successful message - self.publish_message_to_queue(PublishType.SUCCESS) - elif SORRY_SENTENCE_FR in message: - # publish the successful message - self.publish_message_to_queue(PublishType.SUCCESS) - elif DOUBLE_REQUEST_ERROR_MESSAGE_FR in message: - # publish the successful message - self.publish_message_to_queue(PublishType.DUPLICATED) - except Exception as error: - self.logger.error(error) - - def on_document_loaded(self): - self.logger.info("on_document_loaded called") - - def _handle_errors(self, erro_content: str): - pass - - def termine(self): - self.logger.info("will close browser") - time.sleep(1) - self.browser.close() - - def publish_message_to_queue(self, status: PublishType): - # create the message - if status is PublishType.SUCCESS: - MONGO_STORE_MANAGER.link_validated_for_result(self.page.url) - else: - MONGO_STORE_MANAGER.link_validated_for_result(self.page.url, state=status.name) - self.on_success() - time.sleep(2) - self.browser.close() +import logging +import random +import traceback +from typing import Union + +import sys +import time + +from src import params +from src.db.mongo_manager import MONGO_STORE_MANAGER +from src.pojo.ReserveResultPojo import PublishType +from src.proxy.proxy_type import ProxyType +from src.workers.TlsPlaywright import TlsPlaywright + +OTP_FIELD_ID = "#sms_code" +TIME_OUT = 10 * 60 * 1000 # 10 mins +PAGE_TIMEOUT = 40000 +CONFIRMED_MESSAGE_FR = "Votre demande de rendez-vous Maroquinerie a bien été enregistrée et nous vous en remercions." +SORRY_SENTENCE_FR = "nous sommes sincèrement désolés de n'avoir pu vous satisfaire cette fois-ci" +DOUBLE_REQUEST_ERROR_MESSAGE_FR = "Une demande avec les données saisies a déjà été validée aujourd’hui." + + +class LinkValidator: + tls = TlsPlaywright() + + def __init__(self, link: str, proxy_type=ProxyType.BRIGHT_DATA, headless=False): + self.is_finished = False + self.link = link + self.proxy_type = proxy_type + self.is_event_sent = False + self.is_captcha_in_error = False + self.is_filling_fields = False + self.headless = headless + self.logger = logging.getLogger("LinkValidator") + + def on_success(self): + self.logger.info("on_success called.") + self.is_finished = True + if not self.is_event_sent: + self.logger.info("will send successful event") + params.oracle_log_sender.send_url_validation_result() + self.is_event_sent = True + + def timeout_occurred(self): + params.oracle_log_sender.send_timeout_log(self.link) + self.logger.info("will close timeout modem") + self.termine() + + def _run(self, proxy): + self.logger.info("will start browser") + # reset otp_value to None + devices = random.choice(params.DEVICES) + first_page = None + while first_page is None: + first_page = self.start_browser(proxy, self.tls.playwright, devices) + proxy = params.get_proxy(self.proxy_type) + otp_input = self.page.locator(OTP_FIELD_ID) + otp_input.wait_for(state='visible', timeout=TIME_OUT) + self.logger.info("timeout") + self.termine() + + def start_browser(self, proxy, pwright, device) -> Union[str, None]: + try: + self.browser = pwright.webkit.launch(headless=self.headless, timeout=PAGE_TIMEOUT, proxy=proxy) + self.logger.info("模拟设备: " + device) + simulated_mobile = pwright.devices[device] + context = self.browser.new_context(**simulated_mobile, locale='fr-FR') + self.page = context.new_page() + # hide webdriver information + self.page.add_init_script("""() => { + Object.defineProperty(navigator,'webdriver',{get: () => undefined}); + Object.defineProperty(navigator, 'platform', { + get: () => { + return "iPhone"; + }}); +} + """) + self.page.on("load", self._on_page_loaded) + self.page.goto(self.link, timeout=PAGE_TIMEOUT) + captcha_url = "geo.captcha-delivery.com/captcha" + if captcha_url in self.page.content(): + self.logger.info("will close browser") + self.browser.close() + return None + else: + return self.page.content() + except Exception as error: + params.oracle_log_sender.send_error(str(error)) + traceback.print_exc(*sys.exc_info()) + self.logger.exception(error) + self.logger.info("will close browser") + self.browser.close() + return None + + def start_page(self, proxy, headless=False): + self.headless = headless + self._run(proxy) + + def _on_page_loaded(self): + self.logger.info("页面加载完毕") + self.logger.info("url is " + self.page.url) + try: + message = self.page.content() + if CONFIRMED_MESSAGE_FR in message: + # publish the successful message + self.publish_message_to_queue(PublishType.SUCCESS) + elif SORRY_SENTENCE_FR in message: + # publish the successful message + self.publish_message_to_queue(PublishType.SUCCESS) + elif DOUBLE_REQUEST_ERROR_MESSAGE_FR in message: + # publish the successful message + self.publish_message_to_queue(PublishType.DUPLICATED) + except Exception as error: + self.logger.error(error) + + def on_document_loaded(self): + self.logger.info("on_document_loaded called") + + def _handle_errors(self, erro_content: str): + pass + + def termine(self): + self.logger.info("will close browser") + time.sleep(1) + self.browser.close() + + def publish_message_to_queue(self, status: PublishType): + # create the message + if status is PublishType.SUCCESS: + MONGO_STORE_MANAGER.link_validated_for_result(self.page.url) + else: + MONGO_STORE_MANAGER.link_validated_for_result(self.page.url, state=status.name) + self.on_success() + time.sleep(2) + self.browser.close()