added methode to generate check_name excel

This commit is contained in:
2023-11-03 22:15:43 +01:00
parent cf7155f9fb
commit 8d6c4fed35
8 changed files with 163 additions and 124 deletions
+7
View File
@@ -0,0 +1,7 @@
from src.utils.excel_reader import read_contacts
_contact_list = read_contacts(file_name="/Users/panlei/Desktop/yahoo_aol_valid_23.xlsx")
# print(_contact_list)
for _contact in _contact_list:
if _contact.last_name is None or _contact.first_name is None:
print(_contact.mail)
@@ -57,7 +57,7 @@ def generate_titre_sejour_number(size=10) -> list:
if __name__ == '__main__':
# for i in range(1,200):
# print(get_random_id_number())
# for i in range(1, 501):
# print(get_random_passport_id_number())
for id in generate_titre_sejour_number(3200):
print(id)
for i in range(1, 101):
print(get_random_passport_id_number())
# for id in generate_titre_sejour_number(3200):
# print(id)
+63
View File
@@ -0,0 +1,63 @@
import xlsxwriter
from src.db.mongo_manager import MONGO_STORE_MANAGER
from src.utils.excel_reader import read_contacts
class ExportedUser:
def __init__(self, last_name, first_name, password, mail, phone):
self.last_name = last_name
self.first_name = first_name
self.password = password
self.mail = mail
self.phone = phone
def export_registered_users() -> list:
_user_list = MONGO_STORE_MANAGER.get_all_registered_users()
print(len(_user_list))
return _user_list
# for _user in _user_list:
# print(_user)
def read_contacts_to_check():
_contact_list = read_contacts(file_name="/Users/panlei/Desktop/check_names.xlsx")
return _contact_list
def write_to_excel(_to_export_list):
row = 0
col = 0
# Create a workbook and add a worksheet.
workbook = xlsxwriter.Workbook('check_names_of_contacts_{}.xlsx'.format(len(_to_export_list)))
header_data = ['Nom/姓', 'Prénom/名', 'phone/电话', 'mail/邮箱', 'mot de pass/密码']
worksheet = workbook.add_worksheet()
header_format = workbook.add_format({'bold': True})
for col_num, data in enumerate(header_data):
worksheet.write(row, col_num, data, header_format)
row = row + 1
for info in _to_export_list:
info.phone = info.phone
# Iterate over the data and write it out row by row.
worksheet.write(row, col, info.last_name)
worksheet.write(row, col + 1, info.first_name)
worksheet.write(row, col + 2, "0"+info.phone)
worksheet.write(row, col + 3, info.mail)
worksheet.write(row, col + 4, info.password)
row += 1
workbook.close()
if __name__ == '__main__':
_user_list = export_registered_users()
contact_list = read_contacts_to_check()
_to_export = []
for _user in _user_list:
for contact in contact_list:
if _user.mail == contact.mail:
_export_user = ExportedUser(contact.last_name, contact.first_name, _user.password, contact.mail,
contact.phone)
_to_export.append(_export_user)
write_to_excel(_to_export)
-107
View File
@@ -1,107 +0,0 @@
import itertools
import xlsxwriter
from pyhanlp import *
from Pinyin2Hanzi import DefaultHmmParams
from Pinyin2Hanzi import viterbi
from itertools import combinations
from src.db.mongo_manager import MongoDbManager
from src.pojo.contact_pojo import ContactPojo
from src.utils.excel_reader import get_random_phone_numbers, generate_email_from_name
from src.utils.generate_random_passport_id import get_random_passport_id_number
def get_ordered_combins(stuff):
list_to_return = []
for i, j in itertools.combinations(range(len(stuff) + 1), 2):
print(stuff[i:j])
list_to_return.append(stuff[i:j])
return list_to_return
def get_better_list(list):
return list
# for name in list:
# if len(name) == 2:
# list.remove(name)
# return list
# 关键词提取
def generate_name_from_email(mail_address):
key_words = HanLP.extractKeyword(mail_address, 2)
print(key_words)
hmmparams = DefaultHmmParams()
pinyin_name_list = []
chinese_name_list = []
setence = "".join(key_words)
all_combins = get_ordered_combins(setence)
for i in all_combins:
word_to_test = "".join(i)
if len(word_to_test) >= 2:
# print("word to test is " + word_to_test)
try:
result = viterbi(hmm_params=hmmparams, observations=(word_to_test,), path_num=2)
for item in result:
print(item.score, item.path)
chinese_name_list.extend(item.path)
# print("word is " + word_to_test)
# if len(word_to_test) >= 3:
# all_combins.remove(i)
pinyin_name_list.append(word_to_test)
except Exception as error:
print(error)
print(pinyin_name_list)
# 选择不重复的
# if len(pinyin_name_list) > 3:
# return get_better_list(pinyin_name_list)
# else:
return pinyin_name_list, chinese_name_list
def write_new_contacts_to_excel(valid_contacts: list):
row = 0
col = 0
# Create a workbook and add a worksheet.
workbook = xlsxwriter.Workbook('real_name_contacts_{}.xlsx'.format(len(valid_contacts)))
header_data = ['name', 'phone', 'passport', 'email', 'note']
worksheet = workbook.add_worksheet()
header_format = workbook.add_format({'bold': True})
for col_num, data in enumerate(header_data):
worksheet.write(row, col_num, data, header_format)
row = row + 1
for info in valid_contacts:
info.phone = get_random_phone_numbers()
info.passport = get_random_passport_id_number()
# Iterate over the data and write it out row by row.
worksheet.write(row, col, "{} {}".format(info.last_name, info.first_name))
worksheet.write(row, col + 1, info.phone)
worksheet.write(row, col + 2, info.passport)
worksheet.write(row, col + 3, info.mail)
worksheet.write(row, col + 4, info.note)
row += 1
workbook.close()
if __name__ == '__main__':
db_manager = MongoDbManager()
mail_list = db_manager.get_destination_emails()[501:1000]
# mail_list = db_manager.get_destination_emails()[50:200]
generate_contacts = []
for mail in mail_list:
contact = ContactPojo(mail=mail.mail, phone_number="", passport_number="", last_name="", first_name="")
spliteed = mail.mail.split("@")
possible_name_list = generate_name_from_email(spliteed[0])[0]
chinese_name_list = generate_name_from_email(spliteed[0])[1]
if len(possible_name_list) >= 2:
contact.last_name = possible_name_list[0]
contact.first_name = "".join(possible_name_list[1:-1])
contact.note = " ".join(chinese_name_list)
if len(contact.last_name) > 0 and len(contact.first_name) > 0:
generate_contacts.append(contact)
write_new_contacts_to_excel(generate_contacts)
+76
View File
@@ -0,0 +1,76 @@
import colorsys
import json
import random
import numpy as np
import pandas
import matplotlib.patches as mpatches
from matplotlib import pyplot as plt
from src.pojo.ReserveResultPojo import ReserveResultPojo
def read_reserve_pojo_from_excel(file_path: str):
print("read file " + file_path)
contact_list_in_json = pandas.read_excel(file_path)
# .to_json(orient='records'))
plot_bargraph_with_groupings(contact_list_in_json, "source_from", "source_from", "分布", "型号", "数量")
# contact_dict_list = json.loads(contact_list_in_json)
# contact_list = []
# for contact_dict in contact_dict_list:
# last_name = contact_dict['last_name']
# first_name = contact_dict['first_name']
# phone_number = contact_dict['phone']
# source_from = contact_dict['source_from']
# contact = ReserveResultPojo()
# contact.lastName = last_name
# contact.firstName = first_name
# contact.phone = phone_number
# contact.source_from = source_from
# contact_list.append(contact)
# return contact_list
def plot_bargraph_with_groupings(df, groupby, colourby, title, xlabel, ylabel):
"""
Plots a dataframe showing the frequency of datapoints grouped by one column and coloured by another.
df : dataframe
groupby: the column to groupby
colourby: the column to color by
title: the graph title
xlabel: the x label,
ylabel: the y label
"""
randHSVcolors = [(np.random.rand(), 1, 1) for i in range(len(df[colourby].unique()))]
# Convert HSV list to RGB
randRGBcolors = []
for HSVcolor in randHSVcolors:
randRGBcolors.append(colorsys.hsv_to_rgb(HSVcolor[0], HSVcolor[1], HSVcolor[2]))
# Makes a mapping from the unique colourby column items to a random color.
ind_col_map = {x: y for x, y in zip(df[colourby].unique(),
randRGBcolors)}
# Find when the indicies of the soon to be bar graphs colors.
unique_comb = df[[groupby, colourby]].drop_duplicates()
name_ind_map = {x: y for x, y in zip(unique_comb[groupby], unique_comb[colourby])}
values_counts = df[groupby].value_counts()
c = values_counts.index.map(lambda x: ind_col_map[name_ind_map[x]])
# Makes the bargraph.
ax = df[groupby].value_counts().plot(kind='bar',
# figsize=FIG_SIZE,
title=title,
color=[c.values])
# Makes a legend using the ind_col_map
legend_list = []
for key in ind_col_map.keys():
legend_list.append(mpatches.Patch(color=ind_col_map[key], label=key))
# display the graph.
plt.legend(handles=legend_list)
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
if __name__ == '__main__':
read_reserve_pojo_from_excel("/Users/panlei/2023_10_27.xlsx")