added methode to generate check_name excel
This commit is contained in:
Regular → Executable
+3
-3
@@ -1,9 +1,9 @@
|
||||
from src.db.mongo_manager import MONGO_STORE_MANAGER
|
||||
from src.utils.excel_reader import ExcelHelper
|
||||
from src.utils.excel_reader import read_contacts
|
||||
|
||||
# 检查联系人表的有邮件有没有在数据库中
|
||||
if __name__ == '__main__':
|
||||
excel_reader = ExcelHelper()
|
||||
contacts = excel_reader.read_contacts("/Users/lpan/Desktop/yahoo_aol.xlsx")
|
||||
contacts = read_contacts("/Users/panlei/Desktop/yahoo_aol_valid_16.xlsx")
|
||||
mail_list = MONGO_STORE_MANAGER.get_destination_emails()
|
||||
print("mail_list size is " + str(len(mail_list)))
|
||||
mail_raw_list =[]
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
from src.db.mongo_manager import MONGO_STORE_MANAGER
|
||||
from src.utils.excel_reader import ExcelHelper
|
||||
from src.utils.excel_reader import read_contacts
|
||||
|
||||
|
||||
def upload_contacts_list():
|
||||
excel_helper = ExcelHelper()
|
||||
contacts_to_book = excel_helper.read_contacts("/Users/lpan/Desktop/yahoo_aol.xlsx")
|
||||
return contacts_to_book
|
||||
_contacts_to_book = read_contacts("/Users/panlei/Desktop/yahoo_aol_valid_25.xlsx")
|
||||
return _contacts_to_book
|
||||
|
||||
|
||||
# 把新的联系人存到网上
|
||||
|
||||
Regular → Executable
+7
-6
@@ -7,7 +7,7 @@ import xlsxwriter
|
||||
from src.db.mongo_manager import MongoDbManager
|
||||
from src.pojo.contact_pojo import ContactPojo
|
||||
from src.utils.excel_reader import get_random_fr_phone_numbers, ExcelHelper
|
||||
from src.utils.generate_random_passport_id import get_random_passport_id_number
|
||||
from src.utils.contacts.generate_random_passport_id import get_random_passport_id_number
|
||||
|
||||
|
||||
def get_ordered_combins(stuff):
|
||||
@@ -85,7 +85,7 @@ def has_numbers(inputString):
|
||||
|
||||
|
||||
def check_name(word_to_test, pinyin_name_list):
|
||||
if "_" in word_to_test or "." in word_to_test:
|
||||
if "_" in word_to_test or "." in word_to_test or "v" in word_to_test:
|
||||
return None
|
||||
if has_numbers(word_to_test):
|
||||
return None
|
||||
@@ -149,7 +149,7 @@ def generate_name_from_email(mail_address, pinyin_name_list):
|
||||
# found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
||||
# if found_name is not None:
|
||||
# return found_name
|
||||
if len(word_to_test) >= 1:
|
||||
if len(word_to_test) >= 5:
|
||||
found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
||||
if found_name is not None:
|
||||
return found_name
|
||||
@@ -200,10 +200,11 @@ def find_contact(generate_contacts: list, mail, pinyin_name_list):
|
||||
def generate_contact_from_mail_list():
|
||||
db_manager = MongoDbManager()
|
||||
# mail_list = db_manager.get_destination_emails()[6000:7000]
|
||||
# mail_list = db_manager.get_destination_emails()[3001:3200]
|
||||
# mail_list = db_manager.get_destination_emails()[7570:7590]
|
||||
# mail_list = db_manager.get_destination_emails()[7000:7500]
|
||||
# mail_list = db_manager.get_destination_emails()[2201:2400]
|
||||
# mail_list = db_manager.get_destination_emails()[7080:7181]
|
||||
excel_reader = ExcelHelper()
|
||||
mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/lpan/Desktop/mails/gmail_19_05.xlsx")
|
||||
mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/panlei/Desktop/toExtract.xlsx")
|
||||
generate_contacts = []
|
||||
pinyin_name_list = read_pinyin_list_from_file()
|
||||
random.shuffle(pinyin_name_list)
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
from src.utils.excel_reader import read_contacts
|
||||
|
||||
_contact_list = read_contacts(file_name="/Users/panlei/Desktop/yahoo_aol_valid_23.xlsx")
|
||||
# print(_contact_list)
|
||||
for _contact in _contact_list:
|
||||
if _contact.last_name is None or _contact.first_name is None:
|
||||
print(_contact.mail)
|
||||
Regular → Executable
+4
-4
@@ -57,7 +57,7 @@ def generate_titre_sejour_number(size=10) -> list:
|
||||
if __name__ == '__main__':
|
||||
# for i in range(1,200):
|
||||
# print(get_random_id_number())
|
||||
# for i in range(1, 501):
|
||||
# print(get_random_passport_id_number())
|
||||
for id in generate_titre_sejour_number(3200):
|
||||
print(id)
|
||||
for i in range(1, 101):
|
||||
print(get_random_passport_id_number())
|
||||
# for id in generate_titre_sejour_number(3200):
|
||||
# print(id)
|
||||
@@ -0,0 +1,63 @@
|
||||
import xlsxwriter
|
||||
|
||||
from src.db.mongo_manager import MONGO_STORE_MANAGER
|
||||
from src.utils.excel_reader import read_contacts
|
||||
|
||||
|
||||
class ExportedUser:
|
||||
def __init__(self, last_name, first_name, password, mail, phone):
|
||||
self.last_name = last_name
|
||||
self.first_name = first_name
|
||||
self.password = password
|
||||
self.mail = mail
|
||||
self.phone = phone
|
||||
|
||||
|
||||
def export_registered_users() -> list:
|
||||
_user_list = MONGO_STORE_MANAGER.get_all_registered_users()
|
||||
print(len(_user_list))
|
||||
return _user_list
|
||||
# for _user in _user_list:
|
||||
# print(_user)
|
||||
|
||||
|
||||
def read_contacts_to_check():
|
||||
_contact_list = read_contacts(file_name="/Users/panlei/Desktop/check_names.xlsx")
|
||||
return _contact_list
|
||||
|
||||
|
||||
def write_to_excel(_to_export_list):
|
||||
row = 0
|
||||
col = 0
|
||||
# Create a workbook and add a worksheet.
|
||||
workbook = xlsxwriter.Workbook('check_names_of_contacts_{}.xlsx'.format(len(_to_export_list)))
|
||||
header_data = ['Nom/姓', 'Prénom/名', 'phone/电话', 'mail/邮箱', 'mot de pass/密码']
|
||||
worksheet = workbook.add_worksheet()
|
||||
header_format = workbook.add_format({'bold': True})
|
||||
|
||||
for col_num, data in enumerate(header_data):
|
||||
worksheet.write(row, col_num, data, header_format)
|
||||
row = row + 1
|
||||
for info in _to_export_list:
|
||||
info.phone = info.phone
|
||||
# Iterate over the data and write it out row by row.
|
||||
worksheet.write(row, col, info.last_name)
|
||||
worksheet.write(row, col + 1, info.first_name)
|
||||
worksheet.write(row, col + 2, "0"+info.phone)
|
||||
worksheet.write(row, col + 3, info.mail)
|
||||
worksheet.write(row, col + 4, info.password)
|
||||
row += 1
|
||||
workbook.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
_user_list = export_registered_users()
|
||||
contact_list = read_contacts_to_check()
|
||||
_to_export = []
|
||||
for _user in _user_list:
|
||||
for contact in contact_list:
|
||||
if _user.mail == contact.mail:
|
||||
_export_user = ExportedUser(contact.last_name, contact.first_name, _user.password, contact.mail,
|
||||
contact.phone)
|
||||
_to_export.append(_export_user)
|
||||
write_to_excel(_to_export)
|
||||
@@ -1,107 +0,0 @@
|
||||
import itertools
|
||||
|
||||
import xlsxwriter
|
||||
from pyhanlp import *
|
||||
from Pinyin2Hanzi import DefaultHmmParams
|
||||
from Pinyin2Hanzi import viterbi
|
||||
from itertools import combinations
|
||||
|
||||
from src.db.mongo_manager import MongoDbManager
|
||||
from src.pojo.contact_pojo import ContactPojo
|
||||
from src.utils.excel_reader import get_random_phone_numbers, generate_email_from_name
|
||||
from src.utils.generate_random_passport_id import get_random_passport_id_number
|
||||
|
||||
|
||||
def get_ordered_combins(stuff):
|
||||
list_to_return = []
|
||||
for i, j in itertools.combinations(range(len(stuff) + 1), 2):
|
||||
print(stuff[i:j])
|
||||
list_to_return.append(stuff[i:j])
|
||||
return list_to_return
|
||||
|
||||
|
||||
def get_better_list(list):
|
||||
return list
|
||||
# for name in list:
|
||||
# if len(name) == 2:
|
||||
# list.remove(name)
|
||||
# return list
|
||||
|
||||
|
||||
# 关键词提取
|
||||
|
||||
|
||||
def generate_name_from_email(mail_address):
|
||||
key_words = HanLP.extractKeyword(mail_address, 2)
|
||||
print(key_words)
|
||||
hmmparams = DefaultHmmParams()
|
||||
pinyin_name_list = []
|
||||
chinese_name_list = []
|
||||
setence = "".join(key_words)
|
||||
all_combins = get_ordered_combins(setence)
|
||||
for i in all_combins:
|
||||
word_to_test = "".join(i)
|
||||
if len(word_to_test) >= 2:
|
||||
# print("word to test is " + word_to_test)
|
||||
try:
|
||||
result = viterbi(hmm_params=hmmparams, observations=(word_to_test,), path_num=2)
|
||||
for item in result:
|
||||
print(item.score, item.path)
|
||||
chinese_name_list.extend(item.path)
|
||||
# print("word is " + word_to_test)
|
||||
# if len(word_to_test) >= 3:
|
||||
# all_combins.remove(i)
|
||||
pinyin_name_list.append(word_to_test)
|
||||
except Exception as error:
|
||||
print(error)
|
||||
|
||||
print(pinyin_name_list)
|
||||
# 选择不重复的
|
||||
# if len(pinyin_name_list) > 3:
|
||||
# return get_better_list(pinyin_name_list)
|
||||
# else:
|
||||
return pinyin_name_list, chinese_name_list
|
||||
|
||||
|
||||
def write_new_contacts_to_excel(valid_contacts: list):
|
||||
row = 0
|
||||
col = 0
|
||||
# Create a workbook and add a worksheet.
|
||||
workbook = xlsxwriter.Workbook('real_name_contacts_{}.xlsx'.format(len(valid_contacts)))
|
||||
header_data = ['name', 'phone', 'passport', 'email', 'note']
|
||||
worksheet = workbook.add_worksheet()
|
||||
header_format = workbook.add_format({'bold': True})
|
||||
|
||||
for col_num, data in enumerate(header_data):
|
||||
worksheet.write(row, col_num, data, header_format)
|
||||
row = row + 1
|
||||
for info in valid_contacts:
|
||||
info.phone = get_random_phone_numbers()
|
||||
info.passport = get_random_passport_id_number()
|
||||
# Iterate over the data and write it out row by row.
|
||||
worksheet.write(row, col, "{} {}".format(info.last_name, info.first_name))
|
||||
worksheet.write(row, col + 1, info.phone)
|
||||
worksheet.write(row, col + 2, info.passport)
|
||||
worksheet.write(row, col + 3, info.mail)
|
||||
worksheet.write(row, col + 4, info.note)
|
||||
row += 1
|
||||
workbook.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
db_manager = MongoDbManager()
|
||||
mail_list = db_manager.get_destination_emails()[501:1000]
|
||||
# mail_list = db_manager.get_destination_emails()[50:200]
|
||||
generate_contacts = []
|
||||
for mail in mail_list:
|
||||
contact = ContactPojo(mail=mail.mail, phone_number="", passport_number="", last_name="", first_name="")
|
||||
spliteed = mail.mail.split("@")
|
||||
possible_name_list = generate_name_from_email(spliteed[0])[0]
|
||||
chinese_name_list = generate_name_from_email(spliteed[0])[1]
|
||||
if len(possible_name_list) >= 2:
|
||||
contact.last_name = possible_name_list[0]
|
||||
contact.first_name = "".join(possible_name_list[1:-1])
|
||||
contact.note = " ".join(chinese_name_list)
|
||||
if len(contact.last_name) > 0 and len(contact.first_name) > 0:
|
||||
generate_contacts.append(contact)
|
||||
write_new_contacts_to_excel(generate_contacts)
|
||||
@@ -0,0 +1,76 @@
|
||||
import colorsys
|
||||
import json
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
import pandas
|
||||
import matplotlib.patches as mpatches
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
from src.pojo.ReserveResultPojo import ReserveResultPojo
|
||||
|
||||
|
||||
def read_reserve_pojo_from_excel(file_path: str):
|
||||
print("read file " + file_path)
|
||||
contact_list_in_json = pandas.read_excel(file_path)
|
||||
# .to_json(orient='records'))
|
||||
plot_bargraph_with_groupings(contact_list_in_json, "source_from", "source_from", "分布", "型号", "数量")
|
||||
# contact_dict_list = json.loads(contact_list_in_json)
|
||||
# contact_list = []
|
||||
# for contact_dict in contact_dict_list:
|
||||
# last_name = contact_dict['last_name']
|
||||
# first_name = contact_dict['first_name']
|
||||
# phone_number = contact_dict['phone']
|
||||
# source_from = contact_dict['source_from']
|
||||
# contact = ReserveResultPojo()
|
||||
# contact.lastName = last_name
|
||||
# contact.firstName = first_name
|
||||
# contact.phone = phone_number
|
||||
# contact.source_from = source_from
|
||||
# contact_list.append(contact)
|
||||
# return contact_list
|
||||
|
||||
|
||||
def plot_bargraph_with_groupings(df, groupby, colourby, title, xlabel, ylabel):
|
||||
"""
|
||||
Plots a dataframe showing the frequency of datapoints grouped by one column and coloured by another.
|
||||
df : dataframe
|
||||
groupby: the column to groupby
|
||||
colourby: the column to color by
|
||||
title: the graph title
|
||||
xlabel: the x label,
|
||||
ylabel: the y label
|
||||
"""
|
||||
randHSVcolors = [(np.random.rand(), 1, 1) for i in range(len(df[colourby].unique()))]
|
||||
# Convert HSV list to RGB
|
||||
randRGBcolors = []
|
||||
for HSVcolor in randHSVcolors:
|
||||
randRGBcolors.append(colorsys.hsv_to_rgb(HSVcolor[0], HSVcolor[1], HSVcolor[2]))
|
||||
# Makes a mapping from the unique colourby column items to a random color.
|
||||
ind_col_map = {x: y for x, y in zip(df[colourby].unique(),
|
||||
randRGBcolors)}
|
||||
|
||||
# Find when the indicies of the soon to be bar graphs colors.
|
||||
unique_comb = df[[groupby, colourby]].drop_duplicates()
|
||||
name_ind_map = {x: y for x, y in zip(unique_comb[groupby], unique_comb[colourby])}
|
||||
values_counts = df[groupby].value_counts()
|
||||
c = values_counts.index.map(lambda x: ind_col_map[name_ind_map[x]])
|
||||
|
||||
# Makes the bargraph.
|
||||
ax = df[groupby].value_counts().plot(kind='bar',
|
||||
# figsize=FIG_SIZE,
|
||||
title=title,
|
||||
color=[c.values])
|
||||
# Makes a legend using the ind_col_map
|
||||
legend_list = []
|
||||
for key in ind_col_map.keys():
|
||||
legend_list.append(mpatches.Patch(color=ind_col_map[key], label=key))
|
||||
|
||||
# display the graph.
|
||||
plt.legend(handles=legend_list)
|
||||
ax.set_xlabel(xlabel)
|
||||
ax.set_ylabel(ylabel)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
read_reserve_pojo_from_excel("/Users/panlei/2023_10_27.xlsx")
|
||||
Reference in New Issue
Block a user