add note in the excel
This commit is contained in:
@@ -12,6 +12,7 @@ class ContactPojo:
|
||||
mail: str
|
||||
ccid: str
|
||||
position: int
|
||||
note: str
|
||||
|
||||
def __init__(self, phone_number: str, passport_number: str, last_name: str, first_name: str, mail: str,
|
||||
ccid: str = "",
|
||||
@@ -23,6 +24,7 @@ class ContactPojo:
|
||||
self.ccid = ccid
|
||||
self.mail = mail
|
||||
self.position = position
|
||||
self.note = ""
|
||||
|
||||
def to_firestore_dict(self):
|
||||
dest = {
|
||||
|
||||
+22
-15
@@ -21,10 +21,11 @@ def get_ordered_combins(stuff):
|
||||
|
||||
|
||||
def get_better_list(list):
|
||||
for name in list:
|
||||
if len(name) == 2:
|
||||
list.remove(name)
|
||||
return list
|
||||
# for name in list:
|
||||
# if len(name) == 2:
|
||||
# list.remove(name)
|
||||
# return list
|
||||
|
||||
|
||||
# 关键词提取
|
||||
@@ -34,7 +35,8 @@ def generate_name_from_email(mail_address):
|
||||
key_words = HanLP.extractKeyword(mail_address, 2)
|
||||
print(key_words)
|
||||
hmmparams = DefaultHmmParams()
|
||||
possible_name_list = []
|
||||
pinyin_name_list = []
|
||||
chinese_name_list = []
|
||||
setence = "".join(key_words)
|
||||
all_combins = get_ordered_combins(setence)
|
||||
for i in all_combins:
|
||||
@@ -45,19 +47,20 @@ def generate_name_from_email(mail_address):
|
||||
result = viterbi(hmm_params=hmmparams, observations=(word_to_test,), path_num=2)
|
||||
for item in result:
|
||||
print(item.score, item.path)
|
||||
chinese_name_list.extend(item.path)
|
||||
# print("word is " + word_to_test)
|
||||
if len(word_to_test) >= 3:
|
||||
all_combins.remove(i)
|
||||
possible_name_list.append(word_to_test)
|
||||
# if len(word_to_test) >= 3:
|
||||
# all_combins.remove(i)
|
||||
pinyin_name_list.append(word_to_test)
|
||||
except Exception as error:
|
||||
print(error)
|
||||
|
||||
print(possible_name_list)
|
||||
print(pinyin_name_list)
|
||||
# 选择不重复的
|
||||
if len(possible_name_list) > 3:
|
||||
return get_better_list(possible_name_list)
|
||||
else:
|
||||
return possible_name_list
|
||||
# if len(pinyin_name_list) > 3:
|
||||
# return get_better_list(pinyin_name_list)
|
||||
# else:
|
||||
return pinyin_name_list, chinese_name_list
|
||||
|
||||
|
||||
def write_new_contacts_to_excel(valid_contacts: list):
|
||||
@@ -65,7 +68,7 @@ def write_new_contacts_to_excel(valid_contacts: list):
|
||||
col = 0
|
||||
# Create a workbook and add a worksheet.
|
||||
workbook = xlsxwriter.Workbook('real_name_contacts_{}.xlsx'.format(len(valid_contacts)))
|
||||
header_data = ['name', 'phone', 'passport', 'email']
|
||||
header_data = ['name', 'phone', 'passport', 'email', 'note']
|
||||
worksheet = workbook.add_worksheet()
|
||||
header_format = workbook.add_format({'bold': True})
|
||||
|
||||
@@ -80,21 +83,25 @@ def write_new_contacts_to_excel(valid_contacts: list):
|
||||
worksheet.write(row, col + 1, info.phone)
|
||||
worksheet.write(row, col + 2, info.passport)
|
||||
worksheet.write(row, col + 3, info.mail)
|
||||
worksheet.write(row, col + 4, info.note)
|
||||
row += 1
|
||||
workbook.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
db_manager = MongoDbManager()
|
||||
mail_list = db_manager.get_destination_emails()[50:200]
|
||||
mail_list = db_manager.get_destination_emails()[301:500]
|
||||
# mail_list = db_manager.get_destination_emails()[50:200]
|
||||
generate_contacts = []
|
||||
for mail in mail_list:
|
||||
contact = ContactPojo(mail=mail.mail, phone_number="", passport_number="", last_name="", first_name="")
|
||||
spliteed = mail.mail.split("@")
|
||||
possible_name_list = generate_name_from_email(spliteed[0])
|
||||
possible_name_list = generate_name_from_email(spliteed[0])[0]
|
||||
chinese_name_list = generate_name_from_email(spliteed[0])[1]
|
||||
if len(possible_name_list) >= 2:
|
||||
contact.last_name = possible_name_list[0]
|
||||
contact.first_name = "".join(possible_name_list[1:-1])
|
||||
contact.note = " ".join(chinese_name_list)
|
||||
if len(contact.last_name) > 0 and len(contact.first_name) > 0:
|
||||
generate_contacts.append(contact)
|
||||
write_new_contacts_to_excel(generate_contacts)
|
||||
|
||||
Reference in New Issue
Block a user