From 32748762d6ed0612053b294b0ae447b239b43f98 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Wed, 20 Mar 2024 21:03:32 +0100 Subject: [PATCH] optimization for contact extraction --- src/person_name/extract_name_with_pinyinlist.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/person_name/extract_name_with_pinyinlist.py b/src/person_name/extract_name_with_pinyinlist.py index 87d187a..41786eb 100755 --- a/src/person_name/extract_name_with_pinyinlist.py +++ b/src/person_name/extract_name_with_pinyinlist.py @@ -4,7 +4,7 @@ from concurrent.futures import ThreadPoolExecutor import xlsxwriter -from src.db.mongo_manager import MongoDbManager +from src.db.mongo_manager import MongoDbManager, MONGO_STORE_MANAGER from src.pojo.contact_pojo import ContactPojo from src.utils.excel_reader import get_random_fr_phone_numbers, ExcelHelper from src.utils.contacts.generate_random_passport_id import get_random_passport_id_number @@ -224,7 +224,9 @@ def generate_contact_from_mail_list(_start_position, _end_position): # mail_list = db_manager.get_destination_emails()[8501:8520] # mail_list = db_manager.get_destination_emails()[8521:8600] mail_list = db_manager.get_destination_emails()[_start_position:_end_position] - print("mail_list size is {}".format(len(mail_list))) + print("mail_list size before filter is {}".format(len(mail_list))) + filter_already_validated_contacts(mail_list) + print("mail_list size after filter is {}".format(len(mail_list))) # mail_list = db_manager.get_destination_emails()[9323:9914] # excel_reader = ExcelHelper() # mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/lpan/Desktop/toExtract.xlsx") @@ -239,6 +241,14 @@ def generate_contact_from_mail_list(_start_position, _end_position): write_new_contacts_to_excel(generate_contacts) +def filter_already_validated_contacts(_list_to_extract): + _already_validated_contact_list = MONGO_STORE_MANAGER.get_all_contacts_to_book() + for _validated_contact in _already_validated_contact_list: + for _extracted_contact in _list_to_extract: + if _validated_contact.mail == _extracted_contact.mail: + _list_to_extract.remove(_extracted_contact) + + if __name__ == '__main__': start_position = 14500 end_position = 15084