added methode to generate check_name excel
This commit is contained in:
Regular → Executable
+3
-3
@@ -1,9 +1,9 @@
|
|||||||
from src.db.mongo_manager import MONGO_STORE_MANAGER
|
from src.db.mongo_manager import MONGO_STORE_MANAGER
|
||||||
from src.utils.excel_reader import ExcelHelper
|
from src.utils.excel_reader import read_contacts
|
||||||
|
|
||||||
|
# 检查联系人表的有邮件有没有在数据库中
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
excel_reader = ExcelHelper()
|
contacts = read_contacts("/Users/panlei/Desktop/yahoo_aol_valid_16.xlsx")
|
||||||
contacts = excel_reader.read_contacts("/Users/lpan/Desktop/yahoo_aol.xlsx")
|
|
||||||
mail_list = MONGO_STORE_MANAGER.get_destination_emails()
|
mail_list = MONGO_STORE_MANAGER.get_destination_emails()
|
||||||
print("mail_list size is " + str(len(mail_list)))
|
print("mail_list size is " + str(len(mail_list)))
|
||||||
mail_raw_list =[]
|
mail_raw_list =[]
|
||||||
|
|||||||
@@ -1,11 +1,10 @@
|
|||||||
from src.db.mongo_manager import MONGO_STORE_MANAGER
|
from src.db.mongo_manager import MONGO_STORE_MANAGER
|
||||||
from src.utils.excel_reader import ExcelHelper
|
from src.utils.excel_reader import read_contacts
|
||||||
|
|
||||||
|
|
||||||
def upload_contacts_list():
|
def upload_contacts_list():
|
||||||
excel_helper = ExcelHelper()
|
_contacts_to_book = read_contacts("/Users/panlei/Desktop/yahoo_aol_valid_25.xlsx")
|
||||||
contacts_to_book = excel_helper.read_contacts("/Users/lpan/Desktop/yahoo_aol.xlsx")
|
return _contacts_to_book
|
||||||
return contacts_to_book
|
|
||||||
|
|
||||||
|
|
||||||
# 把新的联系人存到网上
|
# 把新的联系人存到网上
|
||||||
|
|||||||
Regular → Executable
+7
-6
@@ -7,7 +7,7 @@ import xlsxwriter
|
|||||||
from src.db.mongo_manager import MongoDbManager
|
from src.db.mongo_manager import MongoDbManager
|
||||||
from src.pojo.contact_pojo import ContactPojo
|
from src.pojo.contact_pojo import ContactPojo
|
||||||
from src.utils.excel_reader import get_random_fr_phone_numbers, ExcelHelper
|
from src.utils.excel_reader import get_random_fr_phone_numbers, ExcelHelper
|
||||||
from src.utils.generate_random_passport_id import get_random_passport_id_number
|
from src.utils.contacts.generate_random_passport_id import get_random_passport_id_number
|
||||||
|
|
||||||
|
|
||||||
def get_ordered_combins(stuff):
|
def get_ordered_combins(stuff):
|
||||||
@@ -85,7 +85,7 @@ def has_numbers(inputString):
|
|||||||
|
|
||||||
|
|
||||||
def check_name(word_to_test, pinyin_name_list):
|
def check_name(word_to_test, pinyin_name_list):
|
||||||
if "_" in word_to_test or "." in word_to_test:
|
if "_" in word_to_test or "." in word_to_test or "v" in word_to_test:
|
||||||
return None
|
return None
|
||||||
if has_numbers(word_to_test):
|
if has_numbers(word_to_test):
|
||||||
return None
|
return None
|
||||||
@@ -149,7 +149,7 @@ def generate_name_from_email(mail_address, pinyin_name_list):
|
|||||||
# found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
# found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
||||||
# if found_name is not None:
|
# if found_name is not None:
|
||||||
# return found_name
|
# return found_name
|
||||||
if len(word_to_test) >= 1:
|
if len(word_to_test) >= 5:
|
||||||
found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
||||||
if found_name is not None:
|
if found_name is not None:
|
||||||
return found_name
|
return found_name
|
||||||
@@ -200,10 +200,11 @@ def find_contact(generate_contacts: list, mail, pinyin_name_list):
|
|||||||
def generate_contact_from_mail_list():
|
def generate_contact_from_mail_list():
|
||||||
db_manager = MongoDbManager()
|
db_manager = MongoDbManager()
|
||||||
# mail_list = db_manager.get_destination_emails()[6000:7000]
|
# mail_list = db_manager.get_destination_emails()[6000:7000]
|
||||||
# mail_list = db_manager.get_destination_emails()[3001:3200]
|
# mail_list = db_manager.get_destination_emails()[7000:7500]
|
||||||
# mail_list = db_manager.get_destination_emails()[7570:7590]
|
# mail_list = db_manager.get_destination_emails()[2201:2400]
|
||||||
|
# mail_list = db_manager.get_destination_emails()[7080:7181]
|
||||||
excel_reader = ExcelHelper()
|
excel_reader = ExcelHelper()
|
||||||
mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/lpan/Desktop/mails/gmail_19_05.xlsx")
|
mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/panlei/Desktop/toExtract.xlsx")
|
||||||
generate_contacts = []
|
generate_contacts = []
|
||||||
pinyin_name_list = read_pinyin_list_from_file()
|
pinyin_name_list = read_pinyin_list_from_file()
|
||||||
random.shuffle(pinyin_name_list)
|
random.shuffle(pinyin_name_list)
|
||||||
|
|||||||
@@ -0,0 +1,7 @@
|
|||||||
|
from src.utils.excel_reader import read_contacts
|
||||||
|
|
||||||
|
_contact_list = read_contacts(file_name="/Users/panlei/Desktop/yahoo_aol_valid_23.xlsx")
|
||||||
|
# print(_contact_list)
|
||||||
|
for _contact in _contact_list:
|
||||||
|
if _contact.last_name is None or _contact.first_name is None:
|
||||||
|
print(_contact.mail)
|
||||||
Regular → Executable
+4
-4
@@ -57,7 +57,7 @@ def generate_titre_sejour_number(size=10) -> list:
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# for i in range(1,200):
|
# for i in range(1,200):
|
||||||
# print(get_random_id_number())
|
# print(get_random_id_number())
|
||||||
# for i in range(1, 501):
|
for i in range(1, 101):
|
||||||
# print(get_random_passport_id_number())
|
print(get_random_passport_id_number())
|
||||||
for id in generate_titre_sejour_number(3200):
|
# for id in generate_titre_sejour_number(3200):
|
||||||
print(id)
|
# print(id)
|
||||||
@@ -0,0 +1,63 @@
|
|||||||
|
import xlsxwriter
|
||||||
|
|
||||||
|
from src.db.mongo_manager import MONGO_STORE_MANAGER
|
||||||
|
from src.utils.excel_reader import read_contacts
|
||||||
|
|
||||||
|
|
||||||
|
class ExportedUser:
|
||||||
|
def __init__(self, last_name, first_name, password, mail, phone):
|
||||||
|
self.last_name = last_name
|
||||||
|
self.first_name = first_name
|
||||||
|
self.password = password
|
||||||
|
self.mail = mail
|
||||||
|
self.phone = phone
|
||||||
|
|
||||||
|
|
||||||
|
def export_registered_users() -> list:
|
||||||
|
_user_list = MONGO_STORE_MANAGER.get_all_registered_users()
|
||||||
|
print(len(_user_list))
|
||||||
|
return _user_list
|
||||||
|
# for _user in _user_list:
|
||||||
|
# print(_user)
|
||||||
|
|
||||||
|
|
||||||
|
def read_contacts_to_check():
|
||||||
|
_contact_list = read_contacts(file_name="/Users/panlei/Desktop/check_names.xlsx")
|
||||||
|
return _contact_list
|
||||||
|
|
||||||
|
|
||||||
|
def write_to_excel(_to_export_list):
|
||||||
|
row = 0
|
||||||
|
col = 0
|
||||||
|
# Create a workbook and add a worksheet.
|
||||||
|
workbook = xlsxwriter.Workbook('check_names_of_contacts_{}.xlsx'.format(len(_to_export_list)))
|
||||||
|
header_data = ['Nom/姓', 'Prénom/名', 'phone/电话', 'mail/邮箱', 'mot de pass/密码']
|
||||||
|
worksheet = workbook.add_worksheet()
|
||||||
|
header_format = workbook.add_format({'bold': True})
|
||||||
|
|
||||||
|
for col_num, data in enumerate(header_data):
|
||||||
|
worksheet.write(row, col_num, data, header_format)
|
||||||
|
row = row + 1
|
||||||
|
for info in _to_export_list:
|
||||||
|
info.phone = info.phone
|
||||||
|
# Iterate over the data and write it out row by row.
|
||||||
|
worksheet.write(row, col, info.last_name)
|
||||||
|
worksheet.write(row, col + 1, info.first_name)
|
||||||
|
worksheet.write(row, col + 2, "0"+info.phone)
|
||||||
|
worksheet.write(row, col + 3, info.mail)
|
||||||
|
worksheet.write(row, col + 4, info.password)
|
||||||
|
row += 1
|
||||||
|
workbook.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
_user_list = export_registered_users()
|
||||||
|
contact_list = read_contacts_to_check()
|
||||||
|
_to_export = []
|
||||||
|
for _user in _user_list:
|
||||||
|
for contact in contact_list:
|
||||||
|
if _user.mail == contact.mail:
|
||||||
|
_export_user = ExportedUser(contact.last_name, contact.first_name, _user.password, contact.mail,
|
||||||
|
contact.phone)
|
||||||
|
_to_export.append(_export_user)
|
||||||
|
write_to_excel(_to_export)
|
||||||
@@ -1,107 +0,0 @@
|
|||||||
import itertools
|
|
||||||
|
|
||||||
import xlsxwriter
|
|
||||||
from pyhanlp import *
|
|
||||||
from Pinyin2Hanzi import DefaultHmmParams
|
|
||||||
from Pinyin2Hanzi import viterbi
|
|
||||||
from itertools import combinations
|
|
||||||
|
|
||||||
from src.db.mongo_manager import MongoDbManager
|
|
||||||
from src.pojo.contact_pojo import ContactPojo
|
|
||||||
from src.utils.excel_reader import get_random_phone_numbers, generate_email_from_name
|
|
||||||
from src.utils.generate_random_passport_id import get_random_passport_id_number
|
|
||||||
|
|
||||||
|
|
||||||
def get_ordered_combins(stuff):
|
|
||||||
list_to_return = []
|
|
||||||
for i, j in itertools.combinations(range(len(stuff) + 1), 2):
|
|
||||||
print(stuff[i:j])
|
|
||||||
list_to_return.append(stuff[i:j])
|
|
||||||
return list_to_return
|
|
||||||
|
|
||||||
|
|
||||||
def get_better_list(list):
|
|
||||||
return list
|
|
||||||
# for name in list:
|
|
||||||
# if len(name) == 2:
|
|
||||||
# list.remove(name)
|
|
||||||
# return list
|
|
||||||
|
|
||||||
|
|
||||||
# 关键词提取
|
|
||||||
|
|
||||||
|
|
||||||
def generate_name_from_email(mail_address):
|
|
||||||
key_words = HanLP.extractKeyword(mail_address, 2)
|
|
||||||
print(key_words)
|
|
||||||
hmmparams = DefaultHmmParams()
|
|
||||||
pinyin_name_list = []
|
|
||||||
chinese_name_list = []
|
|
||||||
setence = "".join(key_words)
|
|
||||||
all_combins = get_ordered_combins(setence)
|
|
||||||
for i in all_combins:
|
|
||||||
word_to_test = "".join(i)
|
|
||||||
if len(word_to_test) >= 2:
|
|
||||||
# print("word to test is " + word_to_test)
|
|
||||||
try:
|
|
||||||
result = viterbi(hmm_params=hmmparams, observations=(word_to_test,), path_num=2)
|
|
||||||
for item in result:
|
|
||||||
print(item.score, item.path)
|
|
||||||
chinese_name_list.extend(item.path)
|
|
||||||
# print("word is " + word_to_test)
|
|
||||||
# if len(word_to_test) >= 3:
|
|
||||||
# all_combins.remove(i)
|
|
||||||
pinyin_name_list.append(word_to_test)
|
|
||||||
except Exception as error:
|
|
||||||
print(error)
|
|
||||||
|
|
||||||
print(pinyin_name_list)
|
|
||||||
# 选择不重复的
|
|
||||||
# if len(pinyin_name_list) > 3:
|
|
||||||
# return get_better_list(pinyin_name_list)
|
|
||||||
# else:
|
|
||||||
return pinyin_name_list, chinese_name_list
|
|
||||||
|
|
||||||
|
|
||||||
def write_new_contacts_to_excel(valid_contacts: list):
|
|
||||||
row = 0
|
|
||||||
col = 0
|
|
||||||
# Create a workbook and add a worksheet.
|
|
||||||
workbook = xlsxwriter.Workbook('real_name_contacts_{}.xlsx'.format(len(valid_contacts)))
|
|
||||||
header_data = ['name', 'phone', 'passport', 'email', 'note']
|
|
||||||
worksheet = workbook.add_worksheet()
|
|
||||||
header_format = workbook.add_format({'bold': True})
|
|
||||||
|
|
||||||
for col_num, data in enumerate(header_data):
|
|
||||||
worksheet.write(row, col_num, data, header_format)
|
|
||||||
row = row + 1
|
|
||||||
for info in valid_contacts:
|
|
||||||
info.phone = get_random_phone_numbers()
|
|
||||||
info.passport = get_random_passport_id_number()
|
|
||||||
# Iterate over the data and write it out row by row.
|
|
||||||
worksheet.write(row, col, "{} {}".format(info.last_name, info.first_name))
|
|
||||||
worksheet.write(row, col + 1, info.phone)
|
|
||||||
worksheet.write(row, col + 2, info.passport)
|
|
||||||
worksheet.write(row, col + 3, info.mail)
|
|
||||||
worksheet.write(row, col + 4, info.note)
|
|
||||||
row += 1
|
|
||||||
workbook.close()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
db_manager = MongoDbManager()
|
|
||||||
mail_list = db_manager.get_destination_emails()[501:1000]
|
|
||||||
# mail_list = db_manager.get_destination_emails()[50:200]
|
|
||||||
generate_contacts = []
|
|
||||||
for mail in mail_list:
|
|
||||||
contact = ContactPojo(mail=mail.mail, phone_number="", passport_number="", last_name="", first_name="")
|
|
||||||
spliteed = mail.mail.split("@")
|
|
||||||
possible_name_list = generate_name_from_email(spliteed[0])[0]
|
|
||||||
chinese_name_list = generate_name_from_email(spliteed[0])[1]
|
|
||||||
if len(possible_name_list) >= 2:
|
|
||||||
contact.last_name = possible_name_list[0]
|
|
||||||
contact.first_name = "".join(possible_name_list[1:-1])
|
|
||||||
contact.note = " ".join(chinese_name_list)
|
|
||||||
if len(contact.last_name) > 0 and len(contact.first_name) > 0:
|
|
||||||
generate_contacts.append(contact)
|
|
||||||
write_new_contacts_to_excel(generate_contacts)
|
|
||||||
@@ -0,0 +1,76 @@
|
|||||||
|
import colorsys
|
||||||
|
import json
|
||||||
|
import random
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas
|
||||||
|
import matplotlib.patches as mpatches
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
from src.pojo.ReserveResultPojo import ReserveResultPojo
|
||||||
|
|
||||||
|
|
||||||
|
def read_reserve_pojo_from_excel(file_path: str):
|
||||||
|
print("read file " + file_path)
|
||||||
|
contact_list_in_json = pandas.read_excel(file_path)
|
||||||
|
# .to_json(orient='records'))
|
||||||
|
plot_bargraph_with_groupings(contact_list_in_json, "source_from", "source_from", "分布", "型号", "数量")
|
||||||
|
# contact_dict_list = json.loads(contact_list_in_json)
|
||||||
|
# contact_list = []
|
||||||
|
# for contact_dict in contact_dict_list:
|
||||||
|
# last_name = contact_dict['last_name']
|
||||||
|
# first_name = contact_dict['first_name']
|
||||||
|
# phone_number = contact_dict['phone']
|
||||||
|
# source_from = contact_dict['source_from']
|
||||||
|
# contact = ReserveResultPojo()
|
||||||
|
# contact.lastName = last_name
|
||||||
|
# contact.firstName = first_name
|
||||||
|
# contact.phone = phone_number
|
||||||
|
# contact.source_from = source_from
|
||||||
|
# contact_list.append(contact)
|
||||||
|
# return contact_list
|
||||||
|
|
||||||
|
|
||||||
|
def plot_bargraph_with_groupings(df, groupby, colourby, title, xlabel, ylabel):
|
||||||
|
"""
|
||||||
|
Plots a dataframe showing the frequency of datapoints grouped by one column and coloured by another.
|
||||||
|
df : dataframe
|
||||||
|
groupby: the column to groupby
|
||||||
|
colourby: the column to color by
|
||||||
|
title: the graph title
|
||||||
|
xlabel: the x label,
|
||||||
|
ylabel: the y label
|
||||||
|
"""
|
||||||
|
randHSVcolors = [(np.random.rand(), 1, 1) for i in range(len(df[colourby].unique()))]
|
||||||
|
# Convert HSV list to RGB
|
||||||
|
randRGBcolors = []
|
||||||
|
for HSVcolor in randHSVcolors:
|
||||||
|
randRGBcolors.append(colorsys.hsv_to_rgb(HSVcolor[0], HSVcolor[1], HSVcolor[2]))
|
||||||
|
# Makes a mapping from the unique colourby column items to a random color.
|
||||||
|
ind_col_map = {x: y for x, y in zip(df[colourby].unique(),
|
||||||
|
randRGBcolors)}
|
||||||
|
|
||||||
|
# Find when the indicies of the soon to be bar graphs colors.
|
||||||
|
unique_comb = df[[groupby, colourby]].drop_duplicates()
|
||||||
|
name_ind_map = {x: y for x, y in zip(unique_comb[groupby], unique_comb[colourby])}
|
||||||
|
values_counts = df[groupby].value_counts()
|
||||||
|
c = values_counts.index.map(lambda x: ind_col_map[name_ind_map[x]])
|
||||||
|
|
||||||
|
# Makes the bargraph.
|
||||||
|
ax = df[groupby].value_counts().plot(kind='bar',
|
||||||
|
# figsize=FIG_SIZE,
|
||||||
|
title=title,
|
||||||
|
color=[c.values])
|
||||||
|
# Makes a legend using the ind_col_map
|
||||||
|
legend_list = []
|
||||||
|
for key in ind_col_map.keys():
|
||||||
|
legend_list.append(mpatches.Patch(color=ind_col_map[key], label=key))
|
||||||
|
|
||||||
|
# display the graph.
|
||||||
|
plt.legend(handles=legend_list)
|
||||||
|
ax.set_xlabel(xlabel)
|
||||||
|
ax.set_ylabel(ylabel)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
read_reserve_pojo_from_excel("/Users/panlei/2023_10_27.xlsx")
|
||||||
Reference in New Issue
Block a user