new method to extract names
This commit is contained in:
@@ -5,6 +5,7 @@ from imapclient import IMAPClient
|
|||||||
from src.db.mongo_manager import MONGO_STORE_MANAGER
|
from src.db.mongo_manager import MONGO_STORE_MANAGER
|
||||||
from src.mail.mail_constants import create_imap
|
from src.mail.mail_constants import create_imap
|
||||||
from src.pojo.mail.mail_pojo import MailAddress
|
from src.pojo.mail.mail_pojo import MailAddress
|
||||||
|
from src.utils.excel_reader import ExcelHelper
|
||||||
|
|
||||||
|
|
||||||
class MailAddressValidator():
|
class MailAddressValidator():
|
||||||
@@ -49,8 +50,7 @@ def remove_invalid_email():
|
|||||||
MONGO_STORE_MANAGER.remove_email_from_destination_email_list(mail)
|
MONGO_STORE_MANAGER.remove_email_from_destination_email_list(mail)
|
||||||
|
|
||||||
|
|
||||||
def find_and_update_invalid_emails():
|
def find_and_update_invalid_emails(mail_list):
|
||||||
mail_list = MONGO_STORE_MANAGER.get_destination_emails()
|
|
||||||
# mail_address1 = MailAddress(mail="perrateke1983@onet.pl", password="8EQh#UuyMx8zVO9")
|
# mail_address1 = MailAddress(mail="perrateke1983@onet.pl", password="8EQh#UuyMx8zVO9")
|
||||||
# # mail_address2 = MailAddress(mail="chenpeijun@aol.com", password="ytifuwguknzifqyb")
|
# # mail_address2 = MailAddress(mail="chenpeijun@aol.com", password="ytifuwguknzifqyb")
|
||||||
# # mail_address2 = MailAddress(mail="sdfgfhgf1986@aol.com", password="fjwcgvhxxlywqfwm")
|
# # mail_address2 = MailAddress(mail="sdfgfhgf1986@aol.com", password="fjwcgvhxxlywqfwm")
|
||||||
@@ -67,4 +67,8 @@ def find_and_update_invalid_emails():
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# remove_invalid_email()
|
# remove_invalid_email()
|
||||||
find_and_update_invalid_emails()
|
mail_list = MONGO_STORE_MANAGER.get_destination_emails()
|
||||||
|
# excel_reader = ExcelHelper()
|
||||||
|
# mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/lpan/Downloads/hotmail_list.xlsx")
|
||||||
|
# print(email_list)
|
||||||
|
find_and_update_invalid_emails(mail_list)
|
||||||
|
|||||||
@@ -9,11 +9,12 @@ from imapclient import IMAPClient
|
|||||||
|
|
||||||
from src.db.mirgration.migration_tools import migre_accepted_appointment
|
from src.db.mirgration.migration_tools import migre_accepted_appointment
|
||||||
from src.db.mongo_manager import MONGO_STORE_MANAGER
|
from src.db.mongo_manager import MONGO_STORE_MANAGER
|
||||||
from src.mail.mail_constants import DOMAIN_HOTMAIL, create_imap, show_folders
|
from src.mail.mail_constants import create_imap, show_folders
|
||||||
from src.notification.AcceptedResultPojo import get_accepted_result_from
|
from src.notification.AcceptedResultPojo import get_accepted_result_from
|
||||||
from src.notification.mailer import Mailer
|
from src.notification.mailer import Mailer
|
||||||
from src.pojo.ResultEnum import ResultEnum
|
from src.pojo.ResultEnum import ResultEnum
|
||||||
from src.pojo.mail.mail_pojo import MailPojo, MailAddress
|
from src.pojo.mail.mail_pojo import MailPojo
|
||||||
|
from src.utils.excel_reader import ExcelHelper
|
||||||
|
|
||||||
CONFIRMATION_SUBJECT_FR = 'Votre rendez-vous est'
|
CONFIRMATION_SUBJECT_FR = 'Votre rendez-vous est'
|
||||||
CONFIRMATION_SUBJECT_EN = 'appointment is confirmed'
|
CONFIRMATION_SUBJECT_EN = 'appointment is confirmed'
|
||||||
@@ -155,15 +156,17 @@ def accept_appointment_found(accepted_result_list: list):
|
|||||||
|
|
||||||
def read_mails_and_find_confirmation_contacts():
|
def read_mails_and_find_confirmation_contacts():
|
||||||
mail_list = MONGO_STORE_MANAGER.get_destination_emails()
|
mail_list = MONGO_STORE_MANAGER.get_destination_emails()
|
||||||
|
# excel_reader = ExcelHelper()
|
||||||
|
# mail_list =excel_reader.read_email_pojo(file_name="/Users/lpan/Desktop/hotmail_list.xlsx")
|
||||||
# mail_address3 = MailAddress(mail="taibenchragu1978@onet.pl", password="2J)kyfNgyOZ")
|
# mail_address3 = MailAddress(mail="taibenchragu1978@onet.pl", password="2J)kyfNgyOZ")
|
||||||
# mail_list = [mail_address3]
|
# mail_list = [mail_address3]
|
||||||
mails_messages = []
|
mails_messages = []
|
||||||
# read all the emails
|
# read all the emails
|
||||||
with ThreadPoolExecutor(max_workers=200) as executor:
|
with ThreadPoolExecutor(max_workers=200) as executor:
|
||||||
for mail in mail_list:
|
for mail in mail_list:
|
||||||
if DOMAIN_HOTMAIL not in mail.mail:
|
# if DOMAIN_HOTMAIL not in mail.mail:
|
||||||
mail_reader = MailConfirmationReader(mail.mail, mail.password)
|
mail_reader = MailConfirmationReader(mail.mail, mail.password)
|
||||||
executor.submit(mail_reader.read_emails, mails_messages)
|
executor.submit(mail_reader.read_emails, mails_messages)
|
||||||
accepted_appointment_list = []
|
accepted_appointment_list = []
|
||||||
if len(mails_messages) > 0:
|
if len(mails_messages) > 0:
|
||||||
successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
|
successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
|
||||||
|
|||||||
@@ -13,7 +13,9 @@ from src.db.mongo_manager import MONGO_STORE_MANAGER
|
|||||||
from src.logs.AppLogging import init_logger
|
from src.logs.AppLogging import init_logger
|
||||||
from src.mail.mail_constants import DOMAIN_HOTMAIL, create_imap
|
from src.mail.mail_constants import DOMAIN_HOTMAIL, create_imap
|
||||||
from src.pojo.mail.mail_pojo import MailPojo, MailAddress
|
from src.pojo.mail.mail_pojo import MailPojo, MailAddress
|
||||||
|
from src.utils.excel_reader import ExcelHelper
|
||||||
from src.utils.timeutiles import is_time_between
|
from src.utils.timeutiles import is_time_between
|
||||||
|
from src.workers.link_validator import LinkValidator
|
||||||
|
|
||||||
VALIDATION_URL_SUBJECT_fr = 'Validation de votre demande de rendez-vous'
|
VALIDATION_URL_SUBJECT_fr = 'Validation de votre demande de rendez-vous'
|
||||||
VALIDATION_URL_SUBJECT_EN = 'Please confirm your appointment request'
|
VALIDATION_URL_SUBJECT_EN = 'Please confirm your appointment request'
|
||||||
@@ -87,9 +89,9 @@ class MailReader():
|
|||||||
folder=folder))
|
folder=folder))
|
||||||
else:
|
else:
|
||||||
mail_list.extend(self._get_messages_from_folder_for_imapclient(imap))
|
mail_list.extend(self._get_messages_from_folder_for_imapclient(imap))
|
||||||
if DOMAIN_HOTMAIL in self.login:
|
# if DOMAIN_HOTMAIL in self.login:
|
||||||
mail_list.extend(
|
# mail_list.extend(
|
||||||
self._get_messages_from_folder_for_imapclient(imap, folder="Junk"))
|
# self._get_messages_from_folder_for_imapclient(imap, folder="Junk"))
|
||||||
if not isImapClient:
|
if not isImapClient:
|
||||||
imap.close()
|
imap.close()
|
||||||
imap.logout()
|
imap.logout()
|
||||||
@@ -245,8 +247,10 @@ def read_mails():
|
|||||||
if is_time_between(time(7, 30), time(19, 30)):
|
if is_time_between(time(7, 30), time(19, 30)):
|
||||||
# get email address
|
# get email address
|
||||||
mail_list = MONGO_STORE_MANAGER.get_destination_emails()
|
mail_list = MONGO_STORE_MANAGER.get_destination_emails()
|
||||||
|
# excel_reader = ExcelHelper()
|
||||||
|
# mail_list =excel_reader.read_email_pojo(file_name="/Users/lpan/Desktop/hotmail_list.xlsx")
|
||||||
# mail_address1 = MailAddress(mail="appointment2022@aol.com", password="gyilpmvyyvlcaviq")
|
# mail_address1 = MailAddress(mail="appointment2022@aol.com", password="gyilpmvyyvlcaviq")
|
||||||
# mail_address1 = MailAddress(mail="chenpeijun@aol.com", password="ytifuwguknzifqyb")
|
# mail_address1 = MailAddress(mail="sayedyepesv@hotmail.com", password="JGc1UH41")
|
||||||
# # mail_address3 = MailAddress(mail="ciyuexie@aol.com", password="czezlmmyypokdfce")
|
# # mail_address3 = MailAddress(mail="ciyuexie@aol.com", password="czezlmmyypokdfce")
|
||||||
# mail_list = [mail_address1]
|
# mail_list = [mail_address1]
|
||||||
successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
|
successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
|
||||||
|
|||||||
@@ -29,7 +29,28 @@ def get_better_list(list):
|
|||||||
# 关键词提取
|
# 关键词提取
|
||||||
|
|
||||||
def read_pinyin_list_from_file() -> list:
|
def read_pinyin_list_from_file() -> list:
|
||||||
file2 = open('clean_list.txt', 'r')
|
file2 = open('all_new_name_list.txt', 'r')
|
||||||
|
lines = file2.readlines()
|
||||||
|
name_list = []
|
||||||
|
count = 0
|
||||||
|
for line in lines:
|
||||||
|
count += 1
|
||||||
|
print("Line{}: {}".format(count, line.strip()))
|
||||||
|
name_list.append(line.strip())
|
||||||
|
return name_list
|
||||||
|
def read_pinyin_first_name_from_file() -> list:
|
||||||
|
file2 = open('first_name_noDuplicates.txt', 'r')
|
||||||
|
lines = file2.readlines()
|
||||||
|
name_list = []
|
||||||
|
count = 0
|
||||||
|
for line in lines:
|
||||||
|
count += 1
|
||||||
|
print("Line{}: {}".format(count, line.strip()))
|
||||||
|
name_list.append(line.strip())
|
||||||
|
return name_list
|
||||||
|
|
||||||
|
def read_pinyin_last_name_from_file() -> list:
|
||||||
|
file2 = open('last_name_noDuplicates.txt', 'r')
|
||||||
lines = file2.readlines()
|
lines = file2.readlines()
|
||||||
name_list = []
|
name_list = []
|
||||||
count = 0
|
count = 0
|
||||||
@@ -40,57 +61,88 @@ def read_pinyin_list_from_file() -> list:
|
|||||||
return name_list
|
return name_list
|
||||||
|
|
||||||
|
|
||||||
|
def generate_new_list_from_old_name_list():
|
||||||
|
all_last_name = read_pinyin_last_name_from_file()
|
||||||
|
all_first_name = read_pinyin_first_name_from_file()
|
||||||
|
# for name in all_name_list:
|
||||||
|
# last_name = name.split(" ")[0]
|
||||||
|
# first_name = name.split(" ")[-1]
|
||||||
|
# all_last_name.append(last_name)
|
||||||
|
# all_first_name.append(first_name)
|
||||||
|
f = open("all_new_name_list.txt", "w")
|
||||||
|
for last_name in all_last_name:
|
||||||
|
for first_name in all_first_name:
|
||||||
|
new_name = "{} {}\n".format(last_name, first_name)
|
||||||
|
f.write(str(new_name))
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
def has_numbers(inputString):
|
||||||
|
return any(char.isdigit() for char in inputString)
|
||||||
|
def check_name(word_to_test, pinyin_name_list):
|
||||||
|
if "_" in word_to_test or "." in word_to_test:
|
||||||
|
return None
|
||||||
|
if has_numbers(word_to_test):
|
||||||
|
return None
|
||||||
|
for name in pinyin_name_list:
|
||||||
|
last_name = name.split(" ")[0]
|
||||||
|
first_name = name.split(" ")[-1]
|
||||||
|
full_name = last_name + first_name
|
||||||
|
full_name_inverse = first_name + last_name
|
||||||
|
if word_to_test.lower() in last_name.lower():
|
||||||
|
return last_name, first_name
|
||||||
|
elif word_to_test.lower() in first_name.lower():
|
||||||
|
return last_name, first_name
|
||||||
|
elif word_to_test.lower() in full_name.lower():
|
||||||
|
return last_name, first_name
|
||||||
|
elif word_to_test.lower() in full_name_inverse.lower():
|
||||||
|
return last_name, first_name
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def generate_name_from_email(mail_address, pinyin_name_list):
|
def generate_name_from_email(mail_address, pinyin_name_list):
|
||||||
# key_words = HanLP.extractKeyword(mail_address, 2)
|
# key_words = HanLP.extractKeyword(mail_address, 2)
|
||||||
# print(key_words)
|
# print(key_words)
|
||||||
# setence = "".join(key_words)
|
# setence = "".join(key_words)
|
||||||
print("generate for " + mail_address)
|
print("generate for " + mail_address)
|
||||||
all_combins = get_ordered_combins(mail_address)
|
all_combins = get_ordered_combins(mail_address)
|
||||||
|
all_combins.sort(key=len, reverse=True)
|
||||||
|
print(all_combins)
|
||||||
|
|
||||||
for i in all_combins:
|
for i in all_combins:
|
||||||
word_to_test = "".join(i)
|
word_to_test = "".join(i)
|
||||||
print("word to test is " + word_to_test)
|
print("word to test is " + word_to_test)
|
||||||
if len(word_to_test) >= 6:
|
# if len(word_to_test) >= 11:
|
||||||
for name in pinyin_name_list:
|
# found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
||||||
last_name = name.split(" ")[0]
|
# if found_name is not None:
|
||||||
first_name = name.split(" ")[-1]
|
# return found_name
|
||||||
full_name = last_name + first_name
|
# if len(word_to_test) >= 10:
|
||||||
full_name_inverse = first_name + last_name
|
# found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
||||||
if word_to_test in last_name:
|
# if found_name is not None:
|
||||||
return last_name, first_name
|
# return found_name
|
||||||
elif word_to_test in first_name:
|
# if len(word_to_test) >= 9:
|
||||||
return last_name, first_name
|
# found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
||||||
elif word_to_test in full_name:
|
# if found_name is not None:
|
||||||
return last_name, first_name
|
# return found_name
|
||||||
elif word_to_test in full_name_inverse:
|
# if len(word_to_test) >= 8:
|
||||||
return last_name, first_name
|
# found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
||||||
if len(word_to_test) >= 5:
|
# if found_name is not None:
|
||||||
for name in pinyin_name_list:
|
# return found_name
|
||||||
last_name = name.split(" ")[0]
|
# if len(word_to_test) >= 7:
|
||||||
first_name = name.split(" ")[-1]
|
# found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
||||||
full_name = last_name + first_name
|
# if found_name is not None:
|
||||||
full_name_inverse = first_name + last_name
|
# return found_name
|
||||||
if word_to_test in last_name:
|
# if len(word_to_test) >= 6:
|
||||||
return last_name, first_name
|
# found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
||||||
elif word_to_test in first_name:
|
# if found_name is not None:
|
||||||
return last_name, first_name
|
# return found_name
|
||||||
elif word_to_test in full_name:
|
# if len(word_to_test) >= 5:
|
||||||
return last_name, first_name
|
# found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
||||||
elif word_to_test in full_name_inverse:
|
# if found_name is not None:
|
||||||
return last_name, first_name
|
# return found_name
|
||||||
if len(word_to_test) >= 4:
|
if len(word_to_test) >= 4:
|
||||||
for name in pinyin_name_list:
|
found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
||||||
last_name = name.split(" ")[0]
|
if found_name is not None:
|
||||||
first_name = name.split(" ")[-1]
|
return found_name
|
||||||
full_name = last_name + first_name
|
|
||||||
full_name_inverse = first_name + last_name
|
|
||||||
if word_to_test in last_name:
|
|
||||||
return last_name, first_name
|
|
||||||
elif word_to_test in first_name:
|
|
||||||
return last_name, first_name
|
|
||||||
elif word_to_test in full_name:
|
|
||||||
return last_name, first_name
|
|
||||||
elif word_to_test in full_name_inverse:
|
|
||||||
return last_name, first_name
|
|
||||||
|
|
||||||
# 选择不重复的
|
# 选择不重复的
|
||||||
# if len(pinyin_name_list) > 3:
|
# if len(pinyin_name_list) > 3:
|
||||||
@@ -124,7 +176,7 @@ def write_new_contacts_to_excel(valid_contacts: list):
|
|||||||
workbook.close()
|
workbook.close()
|
||||||
|
|
||||||
|
|
||||||
def find_contact(generate_contacts: list, mail):
|
def find_contact(generate_contacts: list, mail, pinyin_name_list):
|
||||||
contact = ContactPojo(mail=mail.mail, phone_number="", passport_number="", last_name="", first_name="")
|
contact = ContactPojo(mail=mail.mail, phone_number="", passport_number="", last_name="", first_name="")
|
||||||
spliteed = mail.mail.split("@")
|
spliteed = mail.mail.split("@")
|
||||||
possible_name_list = generate_name_from_email(spliteed[0], pinyin_name_list)
|
possible_name_list = generate_name_from_email(spliteed[0], pinyin_name_list)
|
||||||
@@ -135,19 +187,24 @@ def find_contact(generate_contacts: list, mail):
|
|||||||
generate_contacts.append(contact)
|
generate_contacts.append(contact)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
def generate_contact_from_mail_list():
|
||||||
db_manager = MongoDbManager()
|
db_manager = MongoDbManager()
|
||||||
# mail_list = db_manager.get_destination_emails()[6000:7000]
|
# mail_list = db_manager.get_destination_emails()[6000:7000]
|
||||||
# mail_list = db_manager.get_destination_emails()[3001:3200]
|
# mail_list = db_manager.get_destination_emails()[3001:3200]
|
||||||
# mail_list = db_manager.get_destination_emails()[50:200]
|
mail_list = db_manager.get_destination_emails()[7570:7590]
|
||||||
excel_reader = ExcelHelper()
|
# excel_reader = ExcelHelper()
|
||||||
mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/lpan/Desktop/yahoo_list.xlsx")
|
# mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/lpan/Desktop/yahoo_list.xlsx")
|
||||||
generate_contacts = []
|
generate_contacts = []
|
||||||
pinyin_name_list = read_pinyin_list_from_file()
|
pinyin_name_list = read_pinyin_list_from_file()
|
||||||
random.shuffle(pinyin_name_list)
|
random.shuffle(pinyin_name_list)
|
||||||
with ThreadPoolExecutor(max_workers=200) as executor:
|
with ThreadPoolExecutor(max_workers=500) as executor:
|
||||||
for mail in mail_list:
|
for mail in mail_list:
|
||||||
executor.submit(find_contact, generate_contacts, mail)
|
executor.submit(find_contact, generate_contacts, mail, pinyin_name_list)
|
||||||
# for mail in mail_list:
|
# for mail in mail_list:
|
||||||
# find_contact(generate_contacts, mail)
|
# find_contact(generate_contacts, mail)
|
||||||
write_new_contacts_to_excel(generate_contacts)
|
write_new_contacts_to_excel(generate_contacts)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
generate_contact_from_mail_list()
|
||||||
|
# generate_new_list_from_old_name_list()
|
||||||
|
|||||||
@@ -24,10 +24,9 @@ DOUBLE_REQUEST_ERROR_MESSAGE_FR = "Une demande avec les données saisies a déj
|
|||||||
class LinkValidator:
|
class LinkValidator:
|
||||||
tls = TlsPlaywright()
|
tls = TlsPlaywright()
|
||||||
|
|
||||||
def __init__(self, link: str, proxy_type=ProxyType.RESIDENTIAL, headless=False):
|
def __init__(self, link: str, headless=False):
|
||||||
self.is_finished = False
|
self.is_finished = False
|
||||||
self.link = link
|
self.link = link
|
||||||
self.proxy_type = proxy_type
|
|
||||||
self.is_event_sent = False
|
self.is_event_sent = False
|
||||||
self.is_captcha_in_error = False
|
self.is_captcha_in_error = False
|
||||||
self.is_filling_fields = False
|
self.is_filling_fields = False
|
||||||
@@ -50,10 +49,8 @@ class LinkValidator:
|
|||||||
def _run(self, proxy):
|
def _run(self, proxy):
|
||||||
self.logger.info("will start browser")
|
self.logger.info("will start browser")
|
||||||
# reset otp_value to None
|
# reset otp_value to None
|
||||||
devices = random.choice(params.DEVICES)
|
|
||||||
first_page = None
|
|
||||||
# while first_page is None:
|
# while first_page is None:
|
||||||
self.start_browser(proxy, self.tls.playwright, devices)
|
self.start_browser(self.tls.playwright)
|
||||||
# proxy = params.get_proxy(self.proxy_type)
|
# proxy = params.get_proxy(self.proxy_type)
|
||||||
otp_input = self.page.locator(OTP_FIELD_ID)
|
otp_input = self.page.locator(OTP_FIELD_ID)
|
||||||
otp_input.wait_for(state='visible', timeout=TIME_OUT)
|
otp_input.wait_for(state='visible', timeout=TIME_OUT)
|
||||||
|
|||||||
Reference in New Issue
Block a user