import datetime import random from pathlib import Path import xlsxwriter from src.db.mongo_manager import MONGO_STORE_MANAGER from src.person_name.extract_name_with_pinyinlist import filter_already_validated_contacts, read_pinyin_list_from_file from src.pojo.contact_pojo import ContactPojo from src.utils.contacts.generate_random_passport_id import generate_single_titre_sejour_number, \ get_random_passport_id_number from src.utils.excel_reader import read_contacts, fr_phone_number_prefix, get_random_fr_phone_numbers, ExcelHelper # from src.person_name.cython_extract_methods import filter_already_validated_contacts, read_pinyin_list_from_file DEFAULT_SERIAL_TO_IGNORE = ["47e7e36b", "bitbrowser"] def upload_contacts_list(): _contacts_to_book = read_contacts(str(Path.home()) + "/Desktop/contact_list_all.xlsx") return _contacts_to_book def fix_phone_number_format(file_path): _contact_list = read_contacts(file_path) for _contact in _contact_list: if _contact.first_name is None or len(_contact.first_name) == 0: print(_contact) # _contact.last_name.replace("\xa0", " ") original_last_name = _contact.last_name _contact.last_name = original_last_name.replace("\xa0", " ").split(" ")[0] _contact.first_name = original_last_name.replace("\xa0", " ").split(" ")[1] print(_contact) if _contact.phone.startswith('7'): if _contact.phone[0:2] not in fr_phone_number_prefix: print(_contact) _contact.phone = get_random_fr_phone_numbers() write_new_contacts_to_excel(_contact_list, file_name="15_05_to_test") def generate_contact_from_mail_list(mail_list_file, name_list_file_path="all_new_name_list.txt"): execl_reader = ExcelHelper() mail_list = execl_reader.read_mails_and_pwd(mail_list_file) print("mail_list size is {}".format(len(mail_list))) # print("mail_list size before filter is {}".format(len(mail_list))) filter_already_validated_contacts(mail_list) print("mail_list size after filter is {}".format(len(mail_list))) generate_contacts = [] pinyin_name_list = read_pinyin_list_from_file(name_list_file_path) random.shuffle(pinyin_name_list) print(pinyin_name_list[0]) for mail in mail_list: phone_number = get_random_fr_phone_numbers() passport_number = get_random_passport_id_number() resident_card_number = generate_single_titre_sejour_number() name = random.choice(pinyin_name_list) last_name = name.split(" ")[0] first_name = name.split(" ")[1] contact = ContactPojo(mail=mail.mail, phone_number=phone_number, passport=passport_number, last_name=last_name, first_name=first_name, store="random") contact.resident_card_number = resident_card_number generate_contacts.append(contact) write_new_contacts_to_excel(generate_contacts) def write_new_contacts_to_excel(valid_contacts: list, file_name=str(datetime.date.today())): row = 0 col = 0 # Create a workbook and add a worksheet. workbook = xlsxwriter.Workbook('contact_list_{}.xlsx'.format(file_name)) header_data = ['name', 'phone', 'passport', 'email', 'store', 'serial', 'ip_country', 'ua', 'resident_card_number'] worksheet = workbook.add_worksheet() header_format = workbook.add_format({'bold': True}) for col_num, data in enumerate(header_data): worksheet.write(row, col_num, data, header_format) row = row + 1 for info in valid_contacts: # Iterate over the data and write it out row by row. worksheet.write(row, col, "{} {}".format(info.last_name, info.first_name)) worksheet.write(row, col + 1, info.phone) worksheet.write(row, col + 2, info.passport) worksheet.write(row, col + 3, info.mail) worksheet.write(row, col + 4, info.store) worksheet.write(row, col + 5, info.serial) worksheet.write(row, col + 6, info.ip_country) worksheet.write(row, col + 7, info.ua) worksheet.write(row, col + 8, info.resident_card_number) row += 1 workbook.close() def generate_valid_contact_list_for_day(segment_number=1): _collection_name = "2025-03-08" _valid_contact_list = MONGO_STORE_MANAGER.get_all_successful_items_for_one_day(_collection_name) _all_contacts = MONGO_STORE_MANAGER.get_all_contacts_to_book() _contact_to_save = [] for _contact in _valid_contact_list: # _contact.store = "faubourg" _contact.store = "random" # if _contact.last_name is None or len(_contact.last_name) == 0: for _true_contact in _all_contacts: if _true_contact.mail == _contact.mail: _contact.last_name = _true_contact.last_name _contact.phone = _true_contact.phone _contact.passport = _true_contact.passport _contact.first_name = _true_contact.first_name _contact.resident_card_number = _true_contact.resident_card_number # if _contact.resident_card_number is None or len(_contact.resident_card_number) == 0: # for _true_contact in _all_contacts: # if _true_contact.mail == _contact.mail: # _contact.resident_card_number = _true_contact.resident_card_number # if len(_contact.serial) == 0: # _contact.serial = DEFAULT_SERIAL if _contact.url_validated: if _contact.last_name is not None and len(_contact.last_name) > 0: _need_to_save = True # remove the duplicated items for _added_item in _contact_to_save: if (_added_item.mail == _contact.mail and _added_item.phone == _contact.phone and _added_item.passport == _contact.passport and _added_item.last_name == _contact.last_name and _added_item.first_name == _contact.first_name): _need_to_save = False if _need_to_save: _contact_to_save.append(_contact) _contact_to_save_list = _contact_to_save _contact_serial_map = MONGO_STORE_MANAGER.get_all_contact_serial_list() for contact in _contact_to_save_list: if contact.serial in DEFAULT_SERIAL_TO_IGNORE: for _contact_serial in _contact_serial_map: if _contact_serial.mail == contact.mail: contact.serial = _contact_serial.serial break write_new_contacts_to_excel(_contact_to_save_list, file_name=_collection_name) write_list_with_segment_number(_collection_name, _contact_to_save_list, segment_number) def write_list_with_segment_number(file_name, _contact_to_save_list, segment_number): for i in range(0, segment_number): _step = int(len(_contact_to_save_list) / segment_number) _sublist = _contact_to_save_list[i * _step:_step * (i + 1)] _file_name = file_name + "_" + str(i + 1) write_new_contacts_to_excel(_sublist, file_name=_file_name) def merge_contact_list_files(file_list: list, final_file_name="merged_contact_list"): _all_contact_list = [] for file in file_list: _all_contact_list.extend(read_contacts(file)) for _con in _all_contact_list: _con.store = "random" _contact_serial_map = MONGO_STORE_MANAGER.get_all_contact_serial_list() for contact in _all_contact_list: if contact.serial == DEFAULT_SERIAL_TO_IGNORE: for _contact_serial in _contact_serial_map: if _contact_serial.mail == contact.mail: contact.serial = _contact_serial.serial break print(len(_all_contact_list)) _list_without_duplicate = list(set(_all_contact_list)) print(len(_list_without_duplicate)) write_new_contacts_to_excel(_list_without_duplicate, file_name=final_file_name) def generate_all_contact_list(): _all_contacts = MONGO_STORE_MANAGER.get_all_contacts_to_book() random.shuffle(_all_contacts) for _contact in _all_contacts: _contact.store = "random" write_new_contacts_to_excel(_all_contacts, file_name="all") write_list_with_segment_number("all", _all_contacts, 1) def write_to_black_list(contacts: list): for contact in contacts: MONGO_STORE_MANAGER.insert_blacklist_contact(contact) def update_contact_list_not_received_mail(): _contact_list = MONGO_STORE_MANAGER.get_all_successful_items_for_day() for _contact in _contact_list: if not _contact.url_validated: write_to_black_list([_contact]) def get_old_validated_contact_list(remove_blacklisted_contact=True, _day_in_str="2024-09-06"): _valid_contact_list = MONGO_STORE_MANAGER.get_all_successful_items_for_one_day(_day_in_str) _domain_list_to_remove = ["firemail.de", "onet.pl", "yahoo.com", "gmx.com"] _all_contacts = MONGO_STORE_MANAGER.get_all_contacts_to_book() _contact_to_save = [] for _contact in _valid_contact_list: # _contact.store = "faubourg" _contact.store = "random" if _contact.last_name is None or len(_contact.last_name) == 0: for _true_contact in _all_contacts: if _true_contact.mail == _contact.mail: _contact.last_name = _true_contact.last_name _contact.phone = _true_contact.phone _contact.passport = _true_contact.passport _contact.first_name = _true_contact.first_name if _contact.url_validated: if _contact.last_name is not None and len(_contact.last_name) > 0: _need_to_save = True # remove the duplicated items for _added_item in _contact_to_save: if (_added_item.mail == _contact.mail and _added_item.phone == _contact.phone and _added_item.passport == _contact.passport and _added_item.last_name == _contact.last_name and _added_item.first_name == _contact.first_name): _need_to_save = False if _need_to_save: # remove the contact if domain is not in the list if _contact.mail.split("@")[1] not in _domain_list_to_remove: _contact_to_save.append(_contact) # remove blacklisted contact if needed if remove_blacklisted_contact: _blacklisted_contact = MONGO_STORE_MANAGER.get_blacklist_contacts() _list_without_blacklisted = [contact for contact in _contact_to_save if contact.mail not in [bl.mail for bl in _blacklisted_contact]] write_new_contacts_to_excel(_list_without_blacklisted, file_name=_day_in_str + "_without_blacklisted") _contact_to_save_list = _contact_to_save write_new_contacts_to_excel(_contact_to_save_list, file_name=_day_in_str) # 把新的联系人存到网上 if __name__ == '__main__': # contacts_to_book = upload_contacts_list() # MONGO_STORE_MANAGER.upload_contact_list(contacts_to_book) # print("start at {}".format(datetime.datetime.now())) # generate_valid_contact_list_for_day(segment_number=2) # generate_contact_from_mail_list("/Users/lpan/Downloads/邮箱及密码_aol_400_08_03_2025.xlsx") # print("end at {}".format(datetime.datetime.now())) # update_contact_list_not_received_mail() # get_old_validated_contact_list() # print("end at {}".format(datetime.datetime.now())) generate_all_contact_list() # merge_contact_list_files( # ["/Users/lpan/Desktop/contact_list_2024-11-04.xlsx", # "/Users/lpan/Desktop/contact_list_2024-11-07.xlsx", # "/Users/lpan/Desktop/contact_list_2024-11-08.xlsx", # "/Users/lpan/Desktop/contact_list_2024-11-06.xlsx" # ]) # fix_phone_number_format("/Users/lpan/Desktop/15_05_to_test.xlsx")