can read links excel

This commit is contained in:
2023-05-17 12:16:10 +02:00
parent e19753a531
commit d4febdf96c
6 changed files with 59 additions and 26 deletions
+7
View File
@@ -259,12 +259,19 @@ class MongoDbManager:
def save_links_to_validate(self, link: str, mail_address: str): def save_links_to_validate(self, link: str, mail_address: str):
collection_to_use = self.db[LINKS_TO_VALIDATE] collection_to_use = self.db[LINKS_TO_VALIDATE]
updated_at = time.strftime("%H:%M:%S", time.localtime()) updated_at = time.strftime("%H:%M:%S", time.localtime())
if len(mail_address) > 0:
collection_to_use.replace_one(filter={'_id': link, }, replacement={ collection_to_use.replace_one(filter={'_id': link, }, replacement={
u'url': link, u'url': link,
u'email': mail_address, u'email': mail_address,
"updated_at": updated_at "updated_at": updated_at
}, },
upsert=True) upsert=True)
else:
collection_to_use.replace_one(filter={'_id': link, }, replacement={
u'url': link,
"updated_at": updated_at
},
upsert=True)
MONGO_STORE_MANAGER = MongoDbManager() MONGO_STORE_MANAGER = MongoDbManager()
+5 -3
View File
@@ -13,16 +13,18 @@ class MailManager:
filter(lambda filtered_item: filtered_item.url_validated is None or filtered_item.url_validated is False, filter(lambda filtered_item: filtered_item.url_validated is None or filtered_item.url_validated is False,
successful_items)) successful_items))
for item in not_validated_items: for item in not_validated_items:
if "@gmail" not in item.email and "@163" not in item.email: if "@gmail" not in item.email and "@163" not in item.email and "@hotmail" not in item.email:
invalid_contacts.append( invalid_contacts.append(
ContactPojo(phone_number=item.phone, passport_number=item.passport, last_name=item.lastName, ContactPojo(phone_number=item.phone, passport_number=item.passport, last_name=item.lastName,
first_name=item.firstName, mail=item.email, )) first_name=item.firstName, mail=item.email, ))
return invalid_contacts return invalid_contacts
def get_valid_emails_for_day(self): def get_valid_emails_for_day(self):
valid_contacts = [] valid_contacts = []
successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day() successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
validated_items = list( validated_items = list(
filter(lambda filtered_item: filtered_item.url_validated is not None and filtered_item.url_validated is True, filter(
lambda filtered_item: filtered_item.url_validated is not None and filtered_item.url_validated is True,
successful_items)) successful_items))
for item in validated_items: for item in validated_items:
if "@aol" in item.email: if "@aol" in item.email:
@@ -56,4 +58,4 @@ class MailManager:
if __name__ == '__main__': if __name__ == '__main__':
manager = MailManager() manager = MailManager()
manager.write_invalid_contacts_to_excel(manager.get_invalid_emails_for_day()) manager.write_invalid_contacts_to_excel(manager.get_invalid_emails_for_day())
manager.write_invalid_contacts_to_excel(manager.get_valid_emails_for_day()) # manager.write_invalid_contacts_to_excel(manager.get_valid_emails_for_day())
+5 -3
View File
@@ -199,9 +199,9 @@ class MailReader():
def need_to_valid_url(url: str, successful_items) -> bool: def need_to_valid_url(url: str, successful_items) -> bool:
# return True return True
if len(successful_items) == 0: # if len(successful_items) == 0:
return False # return False
print("url is :" + url) print("url is :" + url)
parts = url.split('/') parts = url.split('/')
id = parts[5] id = parts[5]
@@ -214,6 +214,8 @@ def need_to_valid_url(url: str, successful_items) -> bool:
return not item.url_validated return not item.url_validated
else: else:
# if url_validated is None # if url_validated is None
if item.url_validated is not None:
return not item.url_validated
return True return True
return True return True
else: else:
@@ -38,6 +38,8 @@ def read_pinyin_list_from_file() -> list:
print("Line{}: {}".format(count, line.strip())) print("Line{}: {}".format(count, line.strip()))
name_list.append(line.strip()) name_list.append(line.strip())
return name_list return name_list
def read_pinyin_first_name_from_file() -> list: def read_pinyin_first_name_from_file() -> list:
file2 = open('first_name_noDuplicates.txt', 'r') file2 = open('first_name_noDuplicates.txt', 'r')
lines = file2.readlines() lines = file2.readlines()
@@ -49,6 +51,7 @@ def read_pinyin_first_name_from_file() -> list:
name_list.append(line.strip()) name_list.append(line.strip())
return name_list return name_list
def read_pinyin_last_name_from_file() -> list: def read_pinyin_last_name_from_file() -> list:
file2 = open('last_name_noDuplicates.txt', 'r') file2 = open('last_name_noDuplicates.txt', 'r')
lines = file2.readlines() lines = file2.readlines()
@@ -76,8 +79,11 @@ def generate_new_list_from_old_name_list():
f.write(str(new_name)) f.write(str(new_name))
f.close() f.close()
def has_numbers(inputString): def has_numbers(inputString):
return any(char.isdigit() for char in inputString) return any(char.isdigit() for char in inputString)
def check_name(word_to_test, pinyin_name_list): def check_name(word_to_test, pinyin_name_list):
if "_" in word_to_test or "." in word_to_test: if "_" in word_to_test or "." in word_to_test:
return None return None
@@ -106,9 +112,13 @@ def generate_name_from_email(mail_address, pinyin_name_list):
print("generate for " + mail_address) print("generate for " + mail_address)
all_combins = get_ordered_combins(mail_address) all_combins = get_ordered_combins(mail_address)
all_combins.sort(key=len, reverse=True) all_combins.sort(key=len, reverse=True)
no_duplicated_list = []
for word in all_combins:
if word not in no_duplicated_list:
no_duplicated_list.append(word)
print(all_combins) print(all_combins)
for i in all_combins: for i in no_duplicated_list:
word_to_test = "".join(i) word_to_test = "".join(i)
print("word to test is " + word_to_test) print("word to test is " + word_to_test)
# if len(word_to_test) >= 11: # if len(word_to_test) >= 11:
@@ -139,7 +149,7 @@ def generate_name_from_email(mail_address, pinyin_name_list):
# found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list) # found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
# if found_name is not None: # if found_name is not None:
# return found_name # return found_name
if len(word_to_test) >= 4: if len(word_to_test) >= 1:
found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list) found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
if found_name is not None: if found_name is not None:
return found_name return found_name
@@ -191,13 +201,13 @@ def generate_contact_from_mail_list():
db_manager = MongoDbManager() db_manager = MongoDbManager()
# mail_list = db_manager.get_destination_emails()[6000:7000] # mail_list = db_manager.get_destination_emails()[6000:7000]
# mail_list = db_manager.get_destination_emails()[3001:3200] # mail_list = db_manager.get_destination_emails()[3001:3200]
mail_list = db_manager.get_destination_emails()[7570:7590] # mail_list = db_manager.get_destination_emails()[7570:7590]
# excel_reader = ExcelHelper() excel_reader = ExcelHelper()
# mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/lpan/Desktop/yahoo_list.xlsx") mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/lpan/Desktop/mails/163_list_60.xlsx")
generate_contacts = [] generate_contacts = []
pinyin_name_list = read_pinyin_list_from_file() pinyin_name_list = read_pinyin_list_from_file()
random.shuffle(pinyin_name_list) random.shuffle(pinyin_name_list)
with ThreadPoolExecutor(max_workers=500) as executor: with ThreadPoolExecutor(max_workers=len(mail_list)) as executor:
for mail in mail_list: for mail in mail_list:
executor.submit(find_contact, generate_contacts, mail, pinyin_name_list) executor.submit(find_contact, generate_contacts, mail, pinyin_name_list)
# for mail in mail_list: # for mail in mail_list:
+2
View File
@@ -53,6 +53,8 @@ class ReserveResultPojo:
url = "" url = ""
if 'id' in source: if 'id' in source:
id = source['id'] id = source['id']
if '_id' in source:
id = source['_id']
else: else:
id = "" id = ""
if 'email' in source: if 'email' in source:
+17 -7
View File
@@ -3,6 +3,7 @@ import random
import string import string
import pandas as pandas import pandas as pandas
import validators as validators
import xlsxwriter import xlsxwriter
from src.config import CONTACT_LIST_FILE from src.config import CONTACT_LIST_FILE
@@ -14,16 +15,24 @@ from src.utils.generate_random_passport_id import get_random_passport_id_number
phone_number_prefix = ['6'] phone_number_prefix = ['6']
def read_links_to_click(file_path):
links_info_in_json = pandas.read_excel(file_path).to_json(orient='records')
# print(links_info_in_json)
for item in json.loads(links_info_in_json):
link = item['link']
if validators.url(link):
print(link)
MONGO_STORE_MANAGER.save_links_to_validate(link, "")
else:
print("error on link " + link)
class ExcelHelper: class ExcelHelper:
def __init__(self): def __init__(self):
self._df = pandas.Series() self._df = pandas.Series()
def write_to_exel(self, file_name, data_list: list):
new_df = pandas.Series(data_list)
self._df = pandas.concat([self._df, new_df])
self._df.to_excel(file_name)
def read_user_agens(self) -> list: def read_user_agens(self) -> list:
user_agent_in_json = pandas.read_excel( user_agent_in_json = pandas.read_excel(
"/Users/lpan/Documents/workspace/appointment_tool/docs/mobile_user_agent_list.xlsx").to_json( "/Users/lpan/Documents/workspace/appointment_tool/docs/mobile_user_agent_list.xlsx").to_json(
@@ -231,8 +240,9 @@ if __name__ == '__main__':
# contacts = excel_reader.read_names("/Users/lpan/Downloads/gmail_10.xlsx") # contacts = excel_reader.read_names("/Users/lpan/Downloads/gmail_10.xlsx")
# print(contacts) # print(contacts)
# write_new_contacts_to_excel(valid_contacts=contacts) # write_new_contacts_to_excel(valid_contacts=contacts)
excel_reader = ExcelHelper() # excel_reader = ExcelHelper()
excel_reader.check_contact_list("/Users/lpan/Desktop/contact_email_valid.xlsx") # excel_reader.check_contact_list("/Users/lpan/Desktop/contact_email_valid.xlsx")
read_links_to_click("/Users/lpan/Downloads/链接.xlsx")
# save_mails_to_db() # save_mails_to_db()
# for mail in excel_reader.read_mails_and_pwd(): # for mail in excel_reader.read_mails_and_pwd():
# MONGO_STORE_MANAGER.insert_email(mail) # MONGO_STORE_MANAGER.insert_email(mail)