add note in the excel

This commit is contained in:
2023-04-18 20:23:14 +02:00
parent a3c06b225c
commit 7bbdf6ca9c
2 changed files with 25 additions and 16 deletions
+2
View File
@@ -12,6 +12,7 @@ class ContactPojo:
mail: str mail: str
ccid: str ccid: str
position: int position: int
note: str
def __init__(self, phone_number: str, passport_number: str, last_name: str, first_name: str, mail: str, def __init__(self, phone_number: str, passport_number: str, last_name: str, first_name: str, mail: str,
ccid: str = "", ccid: str = "",
@@ -23,6 +24,7 @@ class ContactPojo:
self.ccid = ccid self.ccid = ccid
self.mail = mail self.mail = mail
self.position = position self.position = position
self.note = ""
def to_firestore_dict(self): def to_firestore_dict(self):
dest = { dest = {
+23 -16
View File
@@ -21,10 +21,11 @@ def get_ordered_combins(stuff):
def get_better_list(list): def get_better_list(list):
for name in list: return list
if len(name) == 2: # for name in list:
list.remove(name) # if len(name) == 2:
return list # list.remove(name)
# return list
# 关键词提取 # 关键词提取
@@ -34,7 +35,8 @@ def generate_name_from_email(mail_address):
key_words = HanLP.extractKeyword(mail_address, 2) key_words = HanLP.extractKeyword(mail_address, 2)
print(key_words) print(key_words)
hmmparams = DefaultHmmParams() hmmparams = DefaultHmmParams()
possible_name_list = [] pinyin_name_list = []
chinese_name_list = []
setence = "".join(key_words) setence = "".join(key_words)
all_combins = get_ordered_combins(setence) all_combins = get_ordered_combins(setence)
for i in all_combins: for i in all_combins:
@@ -45,19 +47,20 @@ def generate_name_from_email(mail_address):
result = viterbi(hmm_params=hmmparams, observations=(word_to_test,), path_num=2) result = viterbi(hmm_params=hmmparams, observations=(word_to_test,), path_num=2)
for item in result: for item in result:
print(item.score, item.path) print(item.score, item.path)
chinese_name_list.extend(item.path)
# print("word is " + word_to_test) # print("word is " + word_to_test)
if len(word_to_test) >= 3: # if len(word_to_test) >= 3:
all_combins.remove(i) # all_combins.remove(i)
possible_name_list.append(word_to_test) pinyin_name_list.append(word_to_test)
except Exception as error: except Exception as error:
print(error) print(error)
print(possible_name_list) print(pinyin_name_list)
# 选择不重复的 # 选择不重复的
if len(possible_name_list) > 3: # if len(pinyin_name_list) > 3:
return get_better_list(possible_name_list) # return get_better_list(pinyin_name_list)
else: # else:
return possible_name_list return pinyin_name_list, chinese_name_list
def write_new_contacts_to_excel(valid_contacts: list): def write_new_contacts_to_excel(valid_contacts: list):
@@ -65,7 +68,7 @@ def write_new_contacts_to_excel(valid_contacts: list):
col = 0 col = 0
# Create a workbook and add a worksheet. # Create a workbook and add a worksheet.
workbook = xlsxwriter.Workbook('real_name_contacts_{}.xlsx'.format(len(valid_contacts))) workbook = xlsxwriter.Workbook('real_name_contacts_{}.xlsx'.format(len(valid_contacts)))
header_data = ['name', 'phone', 'passport', 'email'] header_data = ['name', 'phone', 'passport', 'email', 'note']
worksheet = workbook.add_worksheet() worksheet = workbook.add_worksheet()
header_format = workbook.add_format({'bold': True}) header_format = workbook.add_format({'bold': True})
@@ -80,21 +83,25 @@ def write_new_contacts_to_excel(valid_contacts: list):
worksheet.write(row, col + 1, info.phone) worksheet.write(row, col + 1, info.phone)
worksheet.write(row, col + 2, info.passport) worksheet.write(row, col + 2, info.passport)
worksheet.write(row, col + 3, info.mail) worksheet.write(row, col + 3, info.mail)
worksheet.write(row, col + 4, info.note)
row += 1 row += 1
workbook.close() workbook.close()
if __name__ == '__main__': if __name__ == '__main__':
db_manager = MongoDbManager() db_manager = MongoDbManager()
mail_list = db_manager.get_destination_emails()[50:200] mail_list = db_manager.get_destination_emails()[301:500]
# mail_list = db_manager.get_destination_emails()[50:200]
generate_contacts = [] generate_contacts = []
for mail in mail_list: for mail in mail_list:
contact = ContactPojo(mail=mail.mail, phone_number="", passport_number="", last_name="", first_name="") contact = ContactPojo(mail=mail.mail, phone_number="", passport_number="", last_name="", first_name="")
spliteed = mail.mail.split("@") spliteed = mail.mail.split("@")
possible_name_list = generate_name_from_email(spliteed[0]) possible_name_list = generate_name_from_email(spliteed[0])[0]
chinese_name_list = generate_name_from_email(spliteed[0])[1]
if len(possible_name_list) >= 2: if len(possible_name_list) >= 2:
contact.last_name = possible_name_list[0] contact.last_name = possible_name_list[0]
contact.first_name = "".join(possible_name_list[1:-1]) contact.first_name = "".join(possible_name_list[1:-1])
contact.note = " ".join(chinese_name_list)
if len(contact.last_name) > 0 and len(contact.first_name) > 0: if len(contact.last_name) > 0 and len(contact.first_name) > 0:
generate_contacts.append(contact) generate_contacts.append(contact)
write_new_contacts_to_excel(generate_contacts) write_new_contacts_to_excel(generate_contacts)