From c1bf623640493e7e0ff737189fdb1ac9fc4bd161 Mon Sep 17 00:00:00 2001 From: PAN Lei Date: Sat, 14 Dec 2024 11:45:06 +0100 Subject: [PATCH] optimize merge contacts method --- src/person_name/contact_manager.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/person_name/contact_manager.py b/src/person_name/contact_manager.py index bdd337a..1ea76f9 100755 --- a/src/person_name/contact_manager.py +++ b/src/person_name/contact_manager.py @@ -3,13 +3,15 @@ import random from pathlib import Path import xlsxwriter -# from src.person_name.cython_extract_methods import filter_already_validated_contacts, read_pinyin_list_from_file from src.db.mongo_manager import MONGO_STORE_MANAGER from src.pojo.contact_pojo import ContactPojo from src.utils.contacts.generate_random_passport_id import get_random_passport_id_number from src.utils.excel_reader import read_contacts, fr_phone_number_prefix, get_random_fr_phone_numbers, ExcelHelper +# from src.person_name.cython_extract_methods import filter_already_validated_contacts, read_pinyin_list_from_file + + DEFAULT_SERIAL = "47e7e36b" @@ -86,7 +88,7 @@ def write_new_contacts_to_excel(valid_contacts: list, file_name=str(datetime.dat def generate_valid_contact_list_for_day(segment_number=1): - _collection_name = "2024-12-07" + _collection_name = "2024-12-13" _valid_contact_list = MONGO_STORE_MANAGER.get_all_successful_items_for_one_day(_collection_name) _all_contacts = MONGO_STORE_MANAGER.get_all_contacts_to_book() _contact_to_save = [] @@ -101,8 +103,8 @@ def generate_valid_contact_list_for_day(segment_number=1): _contact.passport = _true_contact.passport _contact.first_name = _true_contact.first_name - if len(_contact.serial) == 0: - _contact.serial = DEFAULT_SERIAL + # if len(_contact.serial) == 0: + # _contact.serial = DEFAULT_SERIAL if _contact.url_validated: if _contact.last_name is not None and len(_contact.last_name) > 0: _need_to_save = True @@ -132,6 +134,13 @@ def merge_contact_list_files(file_list: list, final_file_name="merged_contact_li _all_contact_list.extend(read_contacts(file)) for _con in _all_contact_list: _con.store = "random" + _contact_serial_map = MONGO_STORE_MANAGER.get_all_contact_serial_list() + for contact in _all_contact_list: + if contact.serial == DEFAULT_SERIAL: + for _contact_serial in _contact_serial_map: + if _contact_serial.mail == contact.mail: + contact.serial = _contact_serial.serial + break print(len(_all_contact_list)) _list_without_duplicate = list(set(_all_contact_list)) print(len(_list_without_duplicate)) @@ -204,7 +213,7 @@ if __name__ == '__main__': # contacts_to_book = upload_contacts_list() # MONGO_STORE_MANAGER.upload_contact_list(contacts_to_book) # print("start at {}".format(datetime.datetime.now())) - generate_valid_contact_list_for_day(segment_number=2) + # generate_valid_contact_list_for_day(segment_number=2) # generate_contact_from_mail_list("/Users/lpan/Downloads/邮箱及密码.xlsx") # print("end at {}".format(datetime.datetime.now())) # update_contact_list_not_received_mail() @@ -212,7 +221,10 @@ if __name__ == '__main__': # get_old_validated_contact_list() # print("end at {}".format(datetime.datetime.now())) # generate_all_contact_list() - # merge_contact_list_files( - # ["/Users/lpan/Desktop/contact_list_all_old_not_used_contact.xlsx", - # "/Users/lpan/Desktop/contact_list_2024-06-26.xlsx"]) + merge_contact_list_files( + ["/Users/lpan/Desktop/contact_list_2024-11-04.xlsx", + "/Users/lpan/Desktop/contact_list_2024-11-07.xlsx", + "/Users/lpan/Desktop/contact_list_2024-11-08.xlsx", + "/Users/lpan/Desktop/contact_list_2024-11-06.xlsx" + ]) # fix_phone_number_format("/Users/lpan/Desktop/15_05_to_test.xlsx")