can read links excel
This commit is contained in:
+13
-6
@@ -259,12 +259,19 @@ class MongoDbManager:
|
||||
def save_links_to_validate(self, link: str, mail_address: str):
|
||||
collection_to_use = self.db[LINKS_TO_VALIDATE]
|
||||
updated_at = time.strftime("%H:%M:%S", time.localtime())
|
||||
collection_to_use.replace_one(filter={'_id': link, }, replacement={
|
||||
u'url': link,
|
||||
u'email': mail_address,
|
||||
"updated_at": updated_at
|
||||
},
|
||||
upsert=True)
|
||||
if len(mail_address) > 0:
|
||||
collection_to_use.replace_one(filter={'_id': link, }, replacement={
|
||||
u'url': link,
|
||||
u'email': mail_address,
|
||||
"updated_at": updated_at
|
||||
},
|
||||
upsert=True)
|
||||
else:
|
||||
collection_to_use.replace_one(filter={'_id': link, }, replacement={
|
||||
u'url': link,
|
||||
"updated_at": updated_at
|
||||
},
|
||||
upsert=True)
|
||||
|
||||
|
||||
MONGO_STORE_MANAGER = MongoDbManager()
|
||||
|
||||
@@ -13,17 +13,19 @@ class MailManager:
|
||||
filter(lambda filtered_item: filtered_item.url_validated is None or filtered_item.url_validated is False,
|
||||
successful_items))
|
||||
for item in not_validated_items:
|
||||
if "@gmail" not in item.email and "@163" not in item.email:
|
||||
if "@gmail" not in item.email and "@163" not in item.email and "@hotmail" not in item.email:
|
||||
invalid_contacts.append(
|
||||
ContactPojo(phone_number=item.phone, passport_number=item.passport, last_name=item.lastName,
|
||||
first_name=item.firstName, mail=item.email, ))
|
||||
return invalid_contacts
|
||||
|
||||
def get_valid_emails_for_day(self):
|
||||
valid_contacts = []
|
||||
successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
|
||||
validated_items = list(
|
||||
filter(lambda filtered_item: filtered_item.url_validated is not None and filtered_item.url_validated is True,
|
||||
successful_items))
|
||||
filter(
|
||||
lambda filtered_item: filtered_item.url_validated is not None and filtered_item.url_validated is True,
|
||||
successful_items))
|
||||
for item in validated_items:
|
||||
if "@aol" in item.email:
|
||||
valid_contacts.append(
|
||||
@@ -56,4 +58,4 @@ class MailManager:
|
||||
if __name__ == '__main__':
|
||||
manager = MailManager()
|
||||
manager.write_invalid_contacts_to_excel(manager.get_invalid_emails_for_day())
|
||||
manager.write_invalid_contacts_to_excel(manager.get_valid_emails_for_day())
|
||||
# manager.write_invalid_contacts_to_excel(manager.get_valid_emails_for_day())
|
||||
|
||||
@@ -199,9 +199,9 @@ class MailReader():
|
||||
|
||||
|
||||
def need_to_valid_url(url: str, successful_items) -> bool:
|
||||
# return True
|
||||
if len(successful_items) == 0:
|
||||
return False
|
||||
return True
|
||||
# if len(successful_items) == 0:
|
||||
# return False
|
||||
print("url is :" + url)
|
||||
parts = url.split('/')
|
||||
id = parts[5]
|
||||
@@ -214,6 +214,8 @@ def need_to_valid_url(url: str, successful_items) -> bool:
|
||||
return not item.url_validated
|
||||
else:
|
||||
# if url_validated is None
|
||||
if item.url_validated is not None:
|
||||
return not item.url_validated
|
||||
return True
|
||||
return True
|
||||
else:
|
||||
|
||||
@@ -38,6 +38,8 @@ def read_pinyin_list_from_file() -> list:
|
||||
print("Line{}: {}".format(count, line.strip()))
|
||||
name_list.append(line.strip())
|
||||
return name_list
|
||||
|
||||
|
||||
def read_pinyin_first_name_from_file() -> list:
|
||||
file2 = open('first_name_noDuplicates.txt', 'r')
|
||||
lines = file2.readlines()
|
||||
@@ -49,6 +51,7 @@ def read_pinyin_first_name_from_file() -> list:
|
||||
name_list.append(line.strip())
|
||||
return name_list
|
||||
|
||||
|
||||
def read_pinyin_last_name_from_file() -> list:
|
||||
file2 = open('last_name_noDuplicates.txt', 'r')
|
||||
lines = file2.readlines()
|
||||
@@ -76,8 +79,11 @@ def generate_new_list_from_old_name_list():
|
||||
f.write(str(new_name))
|
||||
f.close()
|
||||
|
||||
|
||||
def has_numbers(inputString):
|
||||
return any(char.isdigit() for char in inputString)
|
||||
|
||||
|
||||
def check_name(word_to_test, pinyin_name_list):
|
||||
if "_" in word_to_test or "." in word_to_test:
|
||||
return None
|
||||
@@ -106,9 +112,13 @@ def generate_name_from_email(mail_address, pinyin_name_list):
|
||||
print("generate for " + mail_address)
|
||||
all_combins = get_ordered_combins(mail_address)
|
||||
all_combins.sort(key=len, reverse=True)
|
||||
no_duplicated_list = []
|
||||
for word in all_combins:
|
||||
if word not in no_duplicated_list:
|
||||
no_duplicated_list.append(word)
|
||||
print(all_combins)
|
||||
|
||||
for i in all_combins:
|
||||
for i in no_duplicated_list:
|
||||
word_to_test = "".join(i)
|
||||
print("word to test is " + word_to_test)
|
||||
# if len(word_to_test) >= 11:
|
||||
@@ -139,7 +149,7 @@ def generate_name_from_email(mail_address, pinyin_name_list):
|
||||
# found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
||||
# if found_name is not None:
|
||||
# return found_name
|
||||
if len(word_to_test) >= 4:
|
||||
if len(word_to_test) >= 1:
|
||||
found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
||||
if found_name is not None:
|
||||
return found_name
|
||||
@@ -191,13 +201,13 @@ def generate_contact_from_mail_list():
|
||||
db_manager = MongoDbManager()
|
||||
# mail_list = db_manager.get_destination_emails()[6000:7000]
|
||||
# mail_list = db_manager.get_destination_emails()[3001:3200]
|
||||
mail_list = db_manager.get_destination_emails()[7570:7590]
|
||||
# excel_reader = ExcelHelper()
|
||||
# mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/lpan/Desktop/yahoo_list.xlsx")
|
||||
# mail_list = db_manager.get_destination_emails()[7570:7590]
|
||||
excel_reader = ExcelHelper()
|
||||
mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/lpan/Desktop/mails/163_list_60.xlsx")
|
||||
generate_contacts = []
|
||||
pinyin_name_list = read_pinyin_list_from_file()
|
||||
random.shuffle(pinyin_name_list)
|
||||
with ThreadPoolExecutor(max_workers=500) as executor:
|
||||
with ThreadPoolExecutor(max_workers=len(mail_list)) as executor:
|
||||
for mail in mail_list:
|
||||
executor.submit(find_contact, generate_contacts, mail, pinyin_name_list)
|
||||
# for mail in mail_list:
|
||||
|
||||
@@ -53,6 +53,8 @@ class ReserveResultPojo:
|
||||
url = ""
|
||||
if 'id' in source:
|
||||
id = source['id']
|
||||
if '_id' in source:
|
||||
id = source['_id']
|
||||
else:
|
||||
id = ""
|
||||
if 'email' in source:
|
||||
|
||||
@@ -3,6 +3,7 @@ import random
|
||||
import string
|
||||
|
||||
import pandas as pandas
|
||||
import validators as validators
|
||||
import xlsxwriter
|
||||
|
||||
from src.config import CONTACT_LIST_FILE
|
||||
@@ -14,16 +15,24 @@ from src.utils.generate_random_passport_id import get_random_passport_id_number
|
||||
phone_number_prefix = ['6']
|
||||
|
||||
|
||||
def read_links_to_click(file_path):
|
||||
links_info_in_json = pandas.read_excel(file_path).to_json(orient='records')
|
||||
# print(links_info_in_json)
|
||||
for item in json.loads(links_info_in_json):
|
||||
link = item['link']
|
||||
if validators.url(link):
|
||||
print(link)
|
||||
MONGO_STORE_MANAGER.save_links_to_validate(link, "")
|
||||
|
||||
else:
|
||||
print("error on link " + link)
|
||||
|
||||
|
||||
class ExcelHelper:
|
||||
|
||||
def __init__(self):
|
||||
self._df = pandas.Series()
|
||||
|
||||
def write_to_exel(self, file_name, data_list: list):
|
||||
new_df = pandas.Series(data_list)
|
||||
self._df = pandas.concat([self._df, new_df])
|
||||
self._df.to_excel(file_name)
|
||||
|
||||
def read_user_agens(self) -> list:
|
||||
user_agent_in_json = pandas.read_excel(
|
||||
"/Users/lpan/Documents/workspace/appointment_tool/docs/mobile_user_agent_list.xlsx").to_json(
|
||||
@@ -231,8 +240,9 @@ if __name__ == '__main__':
|
||||
# contacts = excel_reader.read_names("/Users/lpan/Downloads/gmail_10.xlsx")
|
||||
# print(contacts)
|
||||
# write_new_contacts_to_excel(valid_contacts=contacts)
|
||||
excel_reader = ExcelHelper()
|
||||
excel_reader.check_contact_list("/Users/lpan/Desktop/contact_email_valid.xlsx")
|
||||
# excel_reader = ExcelHelper()
|
||||
# excel_reader.check_contact_list("/Users/lpan/Desktop/contact_email_valid.xlsx")
|
||||
read_links_to_click("/Users/lpan/Downloads/链接.xlsx")
|
||||
# save_mails_to_db()
|
||||
# for mail in excel_reader.read_mails_and_pwd():
|
||||
# MONGO_STORE_MANAGER.insert_email(mail)
|
||||
|
||||
Reference in New Issue
Block a user