From 14769e13ffd66c12839146c2e5f4b70cfdd28bd0 Mon Sep 17 00:00:00 2001 From: PAN Lei Date: Wed, 8 Nov 2023 17:34:57 +0100 Subject: [PATCH] avoid v char --- .../extract_name_with_pinyinlist.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/person_name/extract_name_with_pinyinlist.py b/src/person_name/extract_name_with_pinyinlist.py index bc99c38..047acef 100644 --- a/src/person_name/extract_name_with_pinyinlist.py +++ b/src/person_name/extract_name_with_pinyinlist.py @@ -6,6 +6,7 @@ import xlsxwriter from src.db.mongo_manager import MongoDbManager from src.pojo.contact_pojo import ContactPojo +from src.utils import excel_reader from src.utils.excel_reader import get_random_fr_phone_numbers, ExcelHelper from src.utils.generate_random_passport_id import get_random_passport_id_number @@ -85,7 +86,7 @@ def has_numbers(inputString): def check_name(word_to_test, pinyin_name_list): - if "_" in word_to_test or "." in word_to_test: + if "_" in word_to_test or "." in word_to_test or "v" in word_to_test: return None if has_numbers(word_to_test): return None @@ -149,7 +150,10 @@ def generate_name_from_email(mail_address, pinyin_name_list): # found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list) # if found_name is not None: # return found_name - if len(word_to_test) >= 1: + # if len(word_to_test) >= 7: + # if len(word_to_test) >= 6: + if len(word_to_test) >= 5: + # if len(word_to_test) >= 4: found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list) if found_name is not None: return found_name @@ -199,11 +203,14 @@ def find_contact(generate_contacts: list, mail, pinyin_name_list): def generate_contact_from_mail_list(): db_manager = MongoDbManager() - # mail_list = db_manager.get_destination_emails()[6000:7000] - # mail_list = db_manager.get_destination_emails()[3001:3200] - # mail_list = db_manager.get_destination_emails()[7570:7590] - excel_reader = ExcelHelper() - mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/lpan/Desktop/mails/gmail_19_05.xlsx") + # mail_list = db_manager.get_destination_emails()[1:500] + # mail_list = db_manager.get_destination_emails()[701:900] + # mail_list = db_manager.get_destination_emails()[901:1100] + # mail_list = db_manager.get_destination_emails()[3201:3400] + mail_list = db_manager.get_destination_emails()[3401:3600] + # mail_list = db_manager.get_destination_emails()[9323:9914] + # excel_reader = ExcelHelper() + # mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/lpan/Desktop/toExtract.xlsx") generate_contacts = [] pinyin_name_list = read_pinyin_list_from_file() random.shuffle(pinyin_name_list)