diff --git a/src/mail/mail_address_validator.py b/src/mail/mail_address_validator.py index aee8a1c..2da626d 100644 --- a/src/mail/mail_address_validator.py +++ b/src/mail/mail_address_validator.py @@ -5,6 +5,7 @@ from imapclient import IMAPClient from src.db.mongo_manager import MONGO_STORE_MANAGER from src.mail.mail_constants import create_imap from src.pojo.mail.mail_pojo import MailAddress +from src.utils.excel_reader import ExcelHelper class MailAddressValidator(): @@ -49,8 +50,7 @@ def remove_invalid_email(): MONGO_STORE_MANAGER.remove_email_from_destination_email_list(mail) -def find_and_update_invalid_emails(): - mail_list = MONGO_STORE_MANAGER.get_destination_emails() +def find_and_update_invalid_emails(mail_list): # mail_address1 = MailAddress(mail="perrateke1983@onet.pl", password="8EQh#UuyMx8zVO9") # # mail_address2 = MailAddress(mail="chenpeijun@aol.com", password="ytifuwguknzifqyb") # # mail_address2 = MailAddress(mail="sdfgfhgf1986@aol.com", password="fjwcgvhxxlywqfwm") @@ -67,4 +67,8 @@ def find_and_update_invalid_emails(): if __name__ == '__main__': # remove_invalid_email() - find_and_update_invalid_emails() + mail_list = MONGO_STORE_MANAGER.get_destination_emails() + # excel_reader = ExcelHelper() + # mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/lpan/Downloads/hotmail_list.xlsx") + # print(email_list) + find_and_update_invalid_emails(mail_list) diff --git a/src/mail/mail_confirmation.py b/src/mail/mail_confirmation.py index 7b829f5..20971ba 100644 --- a/src/mail/mail_confirmation.py +++ b/src/mail/mail_confirmation.py @@ -9,11 +9,12 @@ from imapclient import IMAPClient from src.db.mirgration.migration_tools import migre_accepted_appointment from src.db.mongo_manager import MONGO_STORE_MANAGER -from src.mail.mail_constants import DOMAIN_HOTMAIL, create_imap, show_folders +from src.mail.mail_constants import create_imap, show_folders from src.notification.AcceptedResultPojo import get_accepted_result_from from src.notification.mailer import Mailer from src.pojo.ResultEnum import ResultEnum -from src.pojo.mail.mail_pojo import MailPojo, MailAddress +from src.pojo.mail.mail_pojo import MailPojo +from src.utils.excel_reader import ExcelHelper CONFIRMATION_SUBJECT_FR = 'Votre rendez-vous est' CONFIRMATION_SUBJECT_EN = 'appointment is confirmed' @@ -155,15 +156,17 @@ def accept_appointment_found(accepted_result_list: list): def read_mails_and_find_confirmation_contacts(): mail_list = MONGO_STORE_MANAGER.get_destination_emails() + # excel_reader = ExcelHelper() + # mail_list =excel_reader.read_email_pojo(file_name="/Users/lpan/Desktop/hotmail_list.xlsx") # mail_address3 = MailAddress(mail="taibenchragu1978@onet.pl", password="2J)kyfNgyOZ") # mail_list = [mail_address3] mails_messages = [] # read all the emails with ThreadPoolExecutor(max_workers=200) as executor: for mail in mail_list: - if DOMAIN_HOTMAIL not in mail.mail: - mail_reader = MailConfirmationReader(mail.mail, mail.password) - executor.submit(mail_reader.read_emails, mails_messages) + # if DOMAIN_HOTMAIL not in mail.mail: + mail_reader = MailConfirmationReader(mail.mail, mail.password) + executor.submit(mail_reader.read_emails, mails_messages) accepted_appointment_list = [] if len(mails_messages) > 0: successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day() diff --git a/src/mail/mail_reader.py b/src/mail/mail_reader.py index c7d3086..20ee28c 100644 --- a/src/mail/mail_reader.py +++ b/src/mail/mail_reader.py @@ -13,7 +13,9 @@ from src.db.mongo_manager import MONGO_STORE_MANAGER from src.logs.AppLogging import init_logger from src.mail.mail_constants import DOMAIN_HOTMAIL, create_imap from src.pojo.mail.mail_pojo import MailPojo, MailAddress +from src.utils.excel_reader import ExcelHelper from src.utils.timeutiles import is_time_between +from src.workers.link_validator import LinkValidator VALIDATION_URL_SUBJECT_fr = 'Validation de votre demande de rendez-vous' VALIDATION_URL_SUBJECT_EN = 'Please confirm your appointment request' @@ -87,9 +89,9 @@ class MailReader(): folder=folder)) else: mail_list.extend(self._get_messages_from_folder_for_imapclient(imap)) - if DOMAIN_HOTMAIL in self.login: - mail_list.extend( - self._get_messages_from_folder_for_imapclient(imap, folder="Junk")) + # if DOMAIN_HOTMAIL in self.login: + # mail_list.extend( + # self._get_messages_from_folder_for_imapclient(imap, folder="Junk")) if not isImapClient: imap.close() imap.logout() @@ -245,8 +247,10 @@ def read_mails(): if is_time_between(time(7, 30), time(19, 30)): # get email address mail_list = MONGO_STORE_MANAGER.get_destination_emails() + # excel_reader = ExcelHelper() + # mail_list =excel_reader.read_email_pojo(file_name="/Users/lpan/Desktop/hotmail_list.xlsx") # mail_address1 = MailAddress(mail="appointment2022@aol.com", password="gyilpmvyyvlcaviq") - # mail_address1 = MailAddress(mail="chenpeijun@aol.com", password="ytifuwguknzifqyb") + # mail_address1 = MailAddress(mail="sayedyepesv@hotmail.com", password="JGc1UH41") # # mail_address3 = MailAddress(mail="ciyuexie@aol.com", password="czezlmmyypokdfce") # mail_list = [mail_address1] successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day() diff --git a/src/person_name/extract_name_with_pinyinlist.py b/src/person_name/extract_name_with_pinyinlist.py index 52d4e66..cbeb435 100644 --- a/src/person_name/extract_name_with_pinyinlist.py +++ b/src/person_name/extract_name_with_pinyinlist.py @@ -29,7 +29,28 @@ def get_better_list(list): # 关键词提取 def read_pinyin_list_from_file() -> list: - file2 = open('clean_list.txt', 'r') + file2 = open('all_new_name_list.txt', 'r') + lines = file2.readlines() + name_list = [] + count = 0 + for line in lines: + count += 1 + print("Line{}: {}".format(count, line.strip())) + name_list.append(line.strip()) + return name_list +def read_pinyin_first_name_from_file() -> list: + file2 = open('first_name_noDuplicates.txt', 'r') + lines = file2.readlines() + name_list = [] + count = 0 + for line in lines: + count += 1 + print("Line{}: {}".format(count, line.strip())) + name_list.append(line.strip()) + return name_list + +def read_pinyin_last_name_from_file() -> list: + file2 = open('last_name_noDuplicates.txt', 'r') lines = file2.readlines() name_list = [] count = 0 @@ -40,57 +61,88 @@ def read_pinyin_list_from_file() -> list: return name_list +def generate_new_list_from_old_name_list(): + all_last_name = read_pinyin_last_name_from_file() + all_first_name = read_pinyin_first_name_from_file() + # for name in all_name_list: + # last_name = name.split(" ")[0] + # first_name = name.split(" ")[-1] + # all_last_name.append(last_name) + # all_first_name.append(first_name) + f = open("all_new_name_list.txt", "w") + for last_name in all_last_name: + for first_name in all_first_name: + new_name = "{} {}\n".format(last_name, first_name) + f.write(str(new_name)) + f.close() + +def has_numbers(inputString): + return any(char.isdigit() for char in inputString) +def check_name(word_to_test, pinyin_name_list): + if "_" in word_to_test or "." in word_to_test: + return None + if has_numbers(word_to_test): + return None + for name in pinyin_name_list: + last_name = name.split(" ")[0] + first_name = name.split(" ")[-1] + full_name = last_name + first_name + full_name_inverse = first_name + last_name + if word_to_test.lower() in last_name.lower(): + return last_name, first_name + elif word_to_test.lower() in first_name.lower(): + return last_name, first_name + elif word_to_test.lower() in full_name.lower(): + return last_name, first_name + elif word_to_test.lower() in full_name_inverse.lower(): + return last_name, first_name + return None + + def generate_name_from_email(mail_address, pinyin_name_list): # key_words = HanLP.extractKeyword(mail_address, 2) # print(key_words) # setence = "".join(key_words) print("generate for " + mail_address) all_combins = get_ordered_combins(mail_address) + all_combins.sort(key=len, reverse=True) + print(all_combins) + for i in all_combins: word_to_test = "".join(i) print("word to test is " + word_to_test) - if len(word_to_test) >= 6: - for name in pinyin_name_list: - last_name = name.split(" ")[0] - first_name = name.split(" ")[-1] - full_name = last_name + first_name - full_name_inverse = first_name + last_name - if word_to_test in last_name: - return last_name, first_name - elif word_to_test in first_name: - return last_name, first_name - elif word_to_test in full_name: - return last_name, first_name - elif word_to_test in full_name_inverse: - return last_name, first_name - if len(word_to_test) >= 5: - for name in pinyin_name_list: - last_name = name.split(" ")[0] - first_name = name.split(" ")[-1] - full_name = last_name + first_name - full_name_inverse = first_name + last_name - if word_to_test in last_name: - return last_name, first_name - elif word_to_test in first_name: - return last_name, first_name - elif word_to_test in full_name: - return last_name, first_name - elif word_to_test in full_name_inverse: - return last_name, first_name + # if len(word_to_test) >= 11: + # found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list) + # if found_name is not None: + # return found_name + # if len(word_to_test) >= 10: + # found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list) + # if found_name is not None: + # return found_name + # if len(word_to_test) >= 9: + # found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list) + # if found_name is not None: + # return found_name + # if len(word_to_test) >= 8: + # found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list) + # if found_name is not None: + # return found_name + # if len(word_to_test) >= 7: + # found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list) + # if found_name is not None: + # return found_name + # if len(word_to_test) >= 6: + # found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list) + # if found_name is not None: + # return found_name + # if len(word_to_test) >= 5: + # found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list) + # if found_name is not None: + # return found_name if len(word_to_test) >= 4: - for name in pinyin_name_list: - last_name = name.split(" ")[0] - first_name = name.split(" ")[-1] - full_name = last_name + first_name - full_name_inverse = first_name + last_name - if word_to_test in last_name: - return last_name, first_name - elif word_to_test in first_name: - return last_name, first_name - elif word_to_test in full_name: - return last_name, first_name - elif word_to_test in full_name_inverse: - return last_name, first_name + found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list) + if found_name is not None: + return found_name # 选择不重复的 # if len(pinyin_name_list) > 3: @@ -124,7 +176,7 @@ def write_new_contacts_to_excel(valid_contacts: list): workbook.close() -def find_contact(generate_contacts: list, mail): +def find_contact(generate_contacts: list, mail, pinyin_name_list): contact = ContactPojo(mail=mail.mail, phone_number="", passport_number="", last_name="", first_name="") spliteed = mail.mail.split("@") possible_name_list = generate_name_from_email(spliteed[0], pinyin_name_list) @@ -135,19 +187,24 @@ def find_contact(generate_contacts: list, mail): generate_contacts.append(contact) -if __name__ == '__main__': +def generate_contact_from_mail_list(): db_manager = MongoDbManager() # mail_list = db_manager.get_destination_emails()[6000:7000] # mail_list = db_manager.get_destination_emails()[3001:3200] - # mail_list = db_manager.get_destination_emails()[50:200] - excel_reader = ExcelHelper() - mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/lpan/Desktop/yahoo_list.xlsx") + mail_list = db_manager.get_destination_emails()[7570:7590] + # excel_reader = ExcelHelper() + # mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/lpan/Desktop/yahoo_list.xlsx") generate_contacts = [] pinyin_name_list = read_pinyin_list_from_file() random.shuffle(pinyin_name_list) - with ThreadPoolExecutor(max_workers=200) as executor: + with ThreadPoolExecutor(max_workers=500) as executor: for mail in mail_list: - executor.submit(find_contact, generate_contacts, mail) + executor.submit(find_contact, generate_contacts, mail, pinyin_name_list) # for mail in mail_list: # find_contact(generate_contacts, mail) write_new_contacts_to_excel(generate_contacts) + + +if __name__ == '__main__': + generate_contact_from_mail_list() + # generate_new_list_from_old_name_list() diff --git a/src/workers/link_validator.py b/src/workers/link_validator.py index f59554f..4228220 100644 --- a/src/workers/link_validator.py +++ b/src/workers/link_validator.py @@ -24,10 +24,9 @@ DOUBLE_REQUEST_ERROR_MESSAGE_FR = "Une demande avec les données saisies a déj class LinkValidator: tls = TlsPlaywright() - def __init__(self, link: str, proxy_type=ProxyType.RESIDENTIAL, headless=False): + def __init__(self, link: str, headless=False): self.is_finished = False self.link = link - self.proxy_type = proxy_type self.is_event_sent = False self.is_captcha_in_error = False self.is_filling_fields = False @@ -50,10 +49,8 @@ class LinkValidator: def _run(self, proxy): self.logger.info("will start browser") # reset otp_value to None - devices = random.choice(params.DEVICES) - first_page = None # while first_page is None: - self.start_browser(proxy, self.tls.playwright, devices) + self.start_browser(self.tls.playwright) # proxy = params.get_proxy(self.proxy_type) otp_input = self.page.locator(OTP_FIELD_ID) otp_input.wait_for(state='visible', timeout=TIME_OUT)