diff --git a/src/utils/excel_reader.py b/src/utils/excel_reader.py index 5d1e8b1..e060dd9 100755 --- a/src/utils/excel_reader.py +++ b/src/utils/excel_reader.py @@ -1,9 +1,9 @@ -import json import random import string +from typing import List -import pandas as pandas -import validators as validators +import pandas as pd +import validators from src.db.mongo_manager import MONGO_STORE_MANAGER from src.pojo.contact_pojo import ContactPojo @@ -23,25 +23,22 @@ chinnese_number_prefix = ['13', '15', '18'] def read_links_to_click(file_path): - links_info_in_json = pandas.read_excel(file_path).to_json(orient='records') - # print(links_info_in_json) - for item in json.loads(links_info_in_json): + links_info = pd.read_excel(file_path).to_dict(orient='records') + for item in links_info: link = item['link'] if validators.url(link): print(link) MONGO_STORE_MANAGER.save_links_to_validate(link, "") - else: print("error on link " + link) -def read_contacts(file_name) -> list: +def read_contacts(file_name) -> List[ContactPojo]: print("read file " + file_name) - contact_list_in_json = pandas.read_excel(file_name).to_json(orient='records') - contact_dict_list = json.loads(contact_list_in_json) + contact_dict_list = pd.read_excel(file_name).to_dict(orient='records') contact_list = [] for contact_dict in contact_dict_list: - if contact_dict['name']: + if contact_dict.get('name'): raw_name = contact_dict['name'].strip() name = raw_name.split(' ') last_name = name[0] @@ -49,20 +46,11 @@ def read_contacts(file_name) -> list: first_name = name[-1] else: first_name = ''.join(name[1:len(name)]) - ip_country = "FR" - if contact_dict.get('ip_country') is not None: - ip_country = contact_dict['ip_country'] - store = "random" - if contact_dict.get('store') is not None: - store = contact_dict['store'] - ua = "" - - if contact_dict.get('ua') is not None: - ua = contact_dict['ua'] - source_from = "" - - if contact_dict.get('source_from') is not None: - source_from = contact_dict['source_from'] + + ip_country = contact_dict.get('ip_country', "FR") + store = contact_dict.get('store', "random") + ua = contact_dict.get('ua', "") + source_from = contact_dict.get('source_from', "") contact = ContactPojo(phone_number=contact_dict['phone'], last_name=last_name, @@ -71,11 +59,9 @@ def read_contacts(file_name) -> list: mail=contact_dict['email'], store=store) contact.source_from = source_from if contact_dict.get('serial') is not None: - serial = contact_dict['serial'] - contact.serial = serial + contact.serial = contact_dict['serial'] if contact_dict.get('resident_card_number') is not None: - resident_card_number = contact_dict['resident_card_number'] - contact.resident_card_number = resident_card_number + contact.resident_card_number = contact_dict['resident_card_number'] contact.ip_country = ip_country contact.ua = ua contact_list.append(contact) @@ -84,14 +70,10 @@ def read_contacts(file_name) -> list: class ExcelHelper: - def __init__(self): - self._df = pandas.Series() - - def read_user_agens(self) -> list: - user_agent_in_json = pandas.read_excel( - "/Users/lpan/Documents/workspace/appointment_tool/docs/mobile_user_agent_list.xlsx").to_json( + def read_user_agents(self) -> List[str]: + user_agent_dict_list = pd.read_excel( + "/Users/lpan/Documents/workspace/appointment_tool/docs/mobile_user_agent_list.xlsx").to_dict( orient='records') - user_agent_dict_list = json.loads(user_agent_in_json) user_agent_list = [] for user_agent_dict in user_agent_dict_list: user_agent_str = user_agent_dict['user_agent'] @@ -100,41 +82,40 @@ class ExcelHelper: if 'Mac OS' not in user_agent_str: user_agent_list.append(user_agent_dict['user_agent']) print(user_agent_list) + return user_agent_list def check_contact_list(self, file_name): contact_list = read_contacts(file_name) for contact in contact_list: - if contact.first_name is None or len(contact.first_name) == 0: + if not contact.first_name: print("error in firstName for " + contact.mail) - if contact.last_name is None or len(contact.last_name) == 0: + if not contact.last_name: print("error in last_name for " + contact.mail) - if contact.phone is None or len(contact.phone) == 0: + if not contact.phone: print("error in phone for " + contact.mail) - if contact.passport is None or len(contact.passport) == 0: + if not contact.passport: print("error in passport_number for " + contact.mail) - if contact.mail is None or len(contact.mail) == 0: + if not contact.mail: print("error in mail for " + contact.phone_number) def read_mails_and_pwd(self, - file_name='/Users/lpan/Desktop/163.xlsx'): + file_name='/Users/lpan/Desktop/163.xlsx') -> List[MailAddress]: contact_list = [] - mail_list_in_json = pandas.read_excel(file_name).to_json(orient='records') - contact_dict_list = json.loads(mail_list_in_json) + contact_dict_list = pd.read_excel(file_name).to_dict(orient='records') for contact_dict in contact_dict_list: - if contact_dict['email']: + if contact_dict.get('email'): mail = contact_dict['email'].strip() pwd = contact_dict['password'] contact = MailAddress(mail, pwd) contact_list.append(contact) return contact_list - def read_names(self, file_name) -> list: - contact_list_in_json = pandas.read_excel(file_name).to_json(orient='records') - contact_dict_list = json.loads(contact_list_in_json) + def read_names(self, file_name) -> List[ContactPojo]: + contact_dict_list = pd.read_excel(file_name).to_dict(orient='records') contact_list = [] count = 2 for contact_dict in contact_dict_list: - if contact_dict['name']: + if contact_dict.get('name'): raw_name = contact_dict['name'].strip() name = raw_name.split(' ') if len(name) == 1: @@ -163,13 +144,12 @@ class ExcelHelper: return contact_list - def read_email_pojo(self, file_name) -> list: - email_info_in_json = pandas.read_excel(file_name).to_json(orient='records') - contact_dict_list = json.loads(email_info_in_json) + def read_email_pojo(self, file_name) -> List[MailAddress]: + contact_dict_list = pd.read_excel(file_name).to_dict(orient='records') contact_list = [] count = 0 for contact_dict in contact_dict_list: - if contact_dict['email']: + if contact_dict.get('email'): email = contact_dict['email'].strip() password = contact_dict['code'] email_destinaire = MailAddress(email, password) @@ -189,11 +169,12 @@ def get_random_fr_phone_numbers(): def get_random_cn_phone_numbers(): - length = 8 # number of characters in the string. - ran = ''.join(random.choices(string.digits, k=length)) - _phone_number = random.choice(fr_phone_number_prefix) + str(ran) + # CN phone numbers are 11 digits, starting with 1. + # Prefixes are usually 3 digits (e.g., 13x, 15x, 18x). prefix = random.choice(chinnese_number_prefix) - return prefix + _phone_number + length = 11 - len(prefix) + ran = ''.join(random.choices(string.digits, k=length)) + return prefix + ran def get_random_id_number() -> str: @@ -201,13 +182,14 @@ def get_random_id_number() -> str: S = 8 # number of characters in the string. # call random.choices() string module to find the string in Uppercase + numeric data. ran = ''.join(random.choices(string.digits, k=S)) - print("The randomly generated string is : 94" + str(ran)) # print the random data - return ran + result = "94" + str(ran) + print("The randomly generated string is : " + result) # print the random data + return result def save_mails_to_db(): excel_reader = ExcelHelper() - emails = excel_reader.read_email_pojo("~/Downloads/邮箱及密码_26_03_25_yahoo.xlsx") + emails = excel_reader.read_email_pojo("~/Downloads/邮箱及密码_outlook_200.xlsx") print(emails) for mail in emails: MONGO_STORE_MANAGER.save_destinary_emails(mail)