From 00794269f503347a804ca42cd84f89a80374fc21 Mon Sep 17 00:00:00 2001 From: PAN Lei Date: Mon, 8 May 2023 23:05:04 +0200 Subject: [PATCH] =?UTF-8?q?=E6=8F=90=E5=8F=96=E5=90=8D=E5=AD=97=E6=97=B6?= =?UTF-8?q?=EF=BC=8C=E4=BE=9D=E6=AC=A1=E6=8F=90=E5=8F=966=EF=BC=8C5?= =?UTF-8?q?=EF=BC=8C4=E4=B8=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../extract_name_with_pinyinlist.py | 36 +++++++++++++++++-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/src/person_name/extract_name_with_pinyinlist.py b/src/person_name/extract_name_with_pinyinlist.py index 908aef4..52d4e66 100644 --- a/src/person_name/extract_name_with_pinyinlist.py +++ b/src/person_name/extract_name_with_pinyinlist.py @@ -6,7 +6,7 @@ import xlsxwriter from src.db.mongo_manager import MongoDbManager from src.pojo.contact_pojo import ContactPojo -from src.utils.excel_reader import get_random_phone_numbers +from src.utils.excel_reader import get_random_phone_numbers, ExcelHelper from src.utils.generate_random_passport_id import get_random_passport_id_number @@ -48,8 +48,36 @@ def generate_name_from_email(mail_address, pinyin_name_list): all_combins = get_ordered_combins(mail_address) for i in all_combins: word_to_test = "".join(i) + print("word to test is " + word_to_test) + if len(word_to_test) >= 6: + for name in pinyin_name_list: + last_name = name.split(" ")[0] + first_name = name.split(" ")[-1] + full_name = last_name + first_name + full_name_inverse = first_name + last_name + if word_to_test in last_name: + return last_name, first_name + elif word_to_test in first_name: + return last_name, first_name + elif word_to_test in full_name: + return last_name, first_name + elif word_to_test in full_name_inverse: + return last_name, first_name if len(word_to_test) >= 5: - # print("word to test is " + word_to_test) + for name in pinyin_name_list: + last_name = name.split(" ")[0] + first_name = name.split(" ")[-1] + full_name = last_name + first_name + full_name_inverse = first_name + last_name + if word_to_test in last_name: + return last_name, first_name + elif word_to_test in first_name: + return last_name, first_name + elif word_to_test in full_name: + return last_name, first_name + elif word_to_test in full_name_inverse: + return last_name, first_name + if len(word_to_test) >= 4: for name in pinyin_name_list: last_name = name.split(" ")[0] first_name = name.split(" ")[-1] @@ -109,9 +137,11 @@ def find_contact(generate_contacts: list, mail): if __name__ == '__main__': db_manager = MongoDbManager() - mail_list = db_manager.get_destination_emails()[5000:6000] + # mail_list = db_manager.get_destination_emails()[6000:7000] # mail_list = db_manager.get_destination_emails()[3001:3200] # mail_list = db_manager.get_destination_emails()[50:200] + excel_reader = ExcelHelper() + mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/lpan/Desktop/yahoo_list.xlsx") generate_contacts = [] pinyin_name_list = read_pinyin_list_from_file() random.shuffle(pinyin_name_list)