can read links excel
This commit is contained in:
+13
-6
@@ -259,12 +259,19 @@ class MongoDbManager:
|
|||||||
def save_links_to_validate(self, link: str, mail_address: str):
|
def save_links_to_validate(self, link: str, mail_address: str):
|
||||||
collection_to_use = self.db[LINKS_TO_VALIDATE]
|
collection_to_use = self.db[LINKS_TO_VALIDATE]
|
||||||
updated_at = time.strftime("%H:%M:%S", time.localtime())
|
updated_at = time.strftime("%H:%M:%S", time.localtime())
|
||||||
collection_to_use.replace_one(filter={'_id': link, }, replacement={
|
if len(mail_address) > 0:
|
||||||
u'url': link,
|
collection_to_use.replace_one(filter={'_id': link, }, replacement={
|
||||||
u'email': mail_address,
|
u'url': link,
|
||||||
"updated_at": updated_at
|
u'email': mail_address,
|
||||||
},
|
"updated_at": updated_at
|
||||||
upsert=True)
|
},
|
||||||
|
upsert=True)
|
||||||
|
else:
|
||||||
|
collection_to_use.replace_one(filter={'_id': link, }, replacement={
|
||||||
|
u'url': link,
|
||||||
|
"updated_at": updated_at
|
||||||
|
},
|
||||||
|
upsert=True)
|
||||||
|
|
||||||
|
|
||||||
MONGO_STORE_MANAGER = MongoDbManager()
|
MONGO_STORE_MANAGER = MongoDbManager()
|
||||||
|
|||||||
@@ -13,17 +13,19 @@ class MailManager:
|
|||||||
filter(lambda filtered_item: filtered_item.url_validated is None or filtered_item.url_validated is False,
|
filter(lambda filtered_item: filtered_item.url_validated is None or filtered_item.url_validated is False,
|
||||||
successful_items))
|
successful_items))
|
||||||
for item in not_validated_items:
|
for item in not_validated_items:
|
||||||
if "@gmail" not in item.email and "@163" not in item.email:
|
if "@gmail" not in item.email and "@163" not in item.email and "@hotmail" not in item.email:
|
||||||
invalid_contacts.append(
|
invalid_contacts.append(
|
||||||
ContactPojo(phone_number=item.phone, passport_number=item.passport, last_name=item.lastName,
|
ContactPojo(phone_number=item.phone, passport_number=item.passport, last_name=item.lastName,
|
||||||
first_name=item.firstName, mail=item.email, ))
|
first_name=item.firstName, mail=item.email, ))
|
||||||
return invalid_contacts
|
return invalid_contacts
|
||||||
|
|
||||||
def get_valid_emails_for_day(self):
|
def get_valid_emails_for_day(self):
|
||||||
valid_contacts = []
|
valid_contacts = []
|
||||||
successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
|
successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
|
||||||
validated_items = list(
|
validated_items = list(
|
||||||
filter(lambda filtered_item: filtered_item.url_validated is not None and filtered_item.url_validated is True,
|
filter(
|
||||||
successful_items))
|
lambda filtered_item: filtered_item.url_validated is not None and filtered_item.url_validated is True,
|
||||||
|
successful_items))
|
||||||
for item in validated_items:
|
for item in validated_items:
|
||||||
if "@aol" in item.email:
|
if "@aol" in item.email:
|
||||||
valid_contacts.append(
|
valid_contacts.append(
|
||||||
@@ -56,4 +58,4 @@ class MailManager:
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
manager = MailManager()
|
manager = MailManager()
|
||||||
manager.write_invalid_contacts_to_excel(manager.get_invalid_emails_for_day())
|
manager.write_invalid_contacts_to_excel(manager.get_invalid_emails_for_day())
|
||||||
manager.write_invalid_contacts_to_excel(manager.get_valid_emails_for_day())
|
# manager.write_invalid_contacts_to_excel(manager.get_valid_emails_for_day())
|
||||||
|
|||||||
@@ -199,9 +199,9 @@ class MailReader():
|
|||||||
|
|
||||||
|
|
||||||
def need_to_valid_url(url: str, successful_items) -> bool:
|
def need_to_valid_url(url: str, successful_items) -> bool:
|
||||||
# return True
|
return True
|
||||||
if len(successful_items) == 0:
|
# if len(successful_items) == 0:
|
||||||
return False
|
# return False
|
||||||
print("url is :" + url)
|
print("url is :" + url)
|
||||||
parts = url.split('/')
|
parts = url.split('/')
|
||||||
id = parts[5]
|
id = parts[5]
|
||||||
@@ -214,6 +214,8 @@ def need_to_valid_url(url: str, successful_items) -> bool:
|
|||||||
return not item.url_validated
|
return not item.url_validated
|
||||||
else:
|
else:
|
||||||
# if url_validated is None
|
# if url_validated is None
|
||||||
|
if item.url_validated is not None:
|
||||||
|
return not item.url_validated
|
||||||
return True
|
return True
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -38,6 +38,8 @@ def read_pinyin_list_from_file() -> list:
|
|||||||
print("Line{}: {}".format(count, line.strip()))
|
print("Line{}: {}".format(count, line.strip()))
|
||||||
name_list.append(line.strip())
|
name_list.append(line.strip())
|
||||||
return name_list
|
return name_list
|
||||||
|
|
||||||
|
|
||||||
def read_pinyin_first_name_from_file() -> list:
|
def read_pinyin_first_name_from_file() -> list:
|
||||||
file2 = open('first_name_noDuplicates.txt', 'r')
|
file2 = open('first_name_noDuplicates.txt', 'r')
|
||||||
lines = file2.readlines()
|
lines = file2.readlines()
|
||||||
@@ -49,6 +51,7 @@ def read_pinyin_first_name_from_file() -> list:
|
|||||||
name_list.append(line.strip())
|
name_list.append(line.strip())
|
||||||
return name_list
|
return name_list
|
||||||
|
|
||||||
|
|
||||||
def read_pinyin_last_name_from_file() -> list:
|
def read_pinyin_last_name_from_file() -> list:
|
||||||
file2 = open('last_name_noDuplicates.txt', 'r')
|
file2 = open('last_name_noDuplicates.txt', 'r')
|
||||||
lines = file2.readlines()
|
lines = file2.readlines()
|
||||||
@@ -76,8 +79,11 @@ def generate_new_list_from_old_name_list():
|
|||||||
f.write(str(new_name))
|
f.write(str(new_name))
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
|
|
||||||
def has_numbers(inputString):
|
def has_numbers(inputString):
|
||||||
return any(char.isdigit() for char in inputString)
|
return any(char.isdigit() for char in inputString)
|
||||||
|
|
||||||
|
|
||||||
def check_name(word_to_test, pinyin_name_list):
|
def check_name(word_to_test, pinyin_name_list):
|
||||||
if "_" in word_to_test or "." in word_to_test:
|
if "_" in word_to_test or "." in word_to_test:
|
||||||
return None
|
return None
|
||||||
@@ -106,9 +112,13 @@ def generate_name_from_email(mail_address, pinyin_name_list):
|
|||||||
print("generate for " + mail_address)
|
print("generate for " + mail_address)
|
||||||
all_combins = get_ordered_combins(mail_address)
|
all_combins = get_ordered_combins(mail_address)
|
||||||
all_combins.sort(key=len, reverse=True)
|
all_combins.sort(key=len, reverse=True)
|
||||||
|
no_duplicated_list = []
|
||||||
|
for word in all_combins:
|
||||||
|
if word not in no_duplicated_list:
|
||||||
|
no_duplicated_list.append(word)
|
||||||
print(all_combins)
|
print(all_combins)
|
||||||
|
|
||||||
for i in all_combins:
|
for i in no_duplicated_list:
|
||||||
word_to_test = "".join(i)
|
word_to_test = "".join(i)
|
||||||
print("word to test is " + word_to_test)
|
print("word to test is " + word_to_test)
|
||||||
# if len(word_to_test) >= 11:
|
# if len(word_to_test) >= 11:
|
||||||
@@ -139,7 +149,7 @@ def generate_name_from_email(mail_address, pinyin_name_list):
|
|||||||
# found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
# found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
||||||
# if found_name is not None:
|
# if found_name is not None:
|
||||||
# return found_name
|
# return found_name
|
||||||
if len(word_to_test) >= 4:
|
if len(word_to_test) >= 1:
|
||||||
found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
||||||
if found_name is not None:
|
if found_name is not None:
|
||||||
return found_name
|
return found_name
|
||||||
@@ -191,13 +201,13 @@ def generate_contact_from_mail_list():
|
|||||||
db_manager = MongoDbManager()
|
db_manager = MongoDbManager()
|
||||||
# mail_list = db_manager.get_destination_emails()[6000:7000]
|
# mail_list = db_manager.get_destination_emails()[6000:7000]
|
||||||
# mail_list = db_manager.get_destination_emails()[3001:3200]
|
# mail_list = db_manager.get_destination_emails()[3001:3200]
|
||||||
mail_list = db_manager.get_destination_emails()[7570:7590]
|
# mail_list = db_manager.get_destination_emails()[7570:7590]
|
||||||
# excel_reader = ExcelHelper()
|
excel_reader = ExcelHelper()
|
||||||
# mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/lpan/Desktop/yahoo_list.xlsx")
|
mail_list = excel_reader.read_mails_and_pwd(file_name="/Users/lpan/Desktop/mails/163_list_60.xlsx")
|
||||||
generate_contacts = []
|
generate_contacts = []
|
||||||
pinyin_name_list = read_pinyin_list_from_file()
|
pinyin_name_list = read_pinyin_list_from_file()
|
||||||
random.shuffle(pinyin_name_list)
|
random.shuffle(pinyin_name_list)
|
||||||
with ThreadPoolExecutor(max_workers=500) as executor:
|
with ThreadPoolExecutor(max_workers=len(mail_list)) as executor:
|
||||||
for mail in mail_list:
|
for mail in mail_list:
|
||||||
executor.submit(find_contact, generate_contacts, mail, pinyin_name_list)
|
executor.submit(find_contact, generate_contacts, mail, pinyin_name_list)
|
||||||
# for mail in mail_list:
|
# for mail in mail_list:
|
||||||
|
|||||||
@@ -53,6 +53,8 @@ class ReserveResultPojo:
|
|||||||
url = ""
|
url = ""
|
||||||
if 'id' in source:
|
if 'id' in source:
|
||||||
id = source['id']
|
id = source['id']
|
||||||
|
if '_id' in source:
|
||||||
|
id = source['_id']
|
||||||
else:
|
else:
|
||||||
id = ""
|
id = ""
|
||||||
if 'email' in source:
|
if 'email' in source:
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import random
|
|||||||
import string
|
import string
|
||||||
|
|
||||||
import pandas as pandas
|
import pandas as pandas
|
||||||
|
import validators as validators
|
||||||
import xlsxwriter
|
import xlsxwriter
|
||||||
|
|
||||||
from src.config import CONTACT_LIST_FILE
|
from src.config import CONTACT_LIST_FILE
|
||||||
@@ -14,16 +15,24 @@ from src.utils.generate_random_passport_id import get_random_passport_id_number
|
|||||||
phone_number_prefix = ['6']
|
phone_number_prefix = ['6']
|
||||||
|
|
||||||
|
|
||||||
|
def read_links_to_click(file_path):
|
||||||
|
links_info_in_json = pandas.read_excel(file_path).to_json(orient='records')
|
||||||
|
# print(links_info_in_json)
|
||||||
|
for item in json.loads(links_info_in_json):
|
||||||
|
link = item['link']
|
||||||
|
if validators.url(link):
|
||||||
|
print(link)
|
||||||
|
MONGO_STORE_MANAGER.save_links_to_validate(link, "")
|
||||||
|
|
||||||
|
else:
|
||||||
|
print("error on link " + link)
|
||||||
|
|
||||||
|
|
||||||
class ExcelHelper:
|
class ExcelHelper:
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._df = pandas.Series()
|
self._df = pandas.Series()
|
||||||
|
|
||||||
def write_to_exel(self, file_name, data_list: list):
|
|
||||||
new_df = pandas.Series(data_list)
|
|
||||||
self._df = pandas.concat([self._df, new_df])
|
|
||||||
self._df.to_excel(file_name)
|
|
||||||
|
|
||||||
def read_user_agens(self) -> list:
|
def read_user_agens(self) -> list:
|
||||||
user_agent_in_json = pandas.read_excel(
|
user_agent_in_json = pandas.read_excel(
|
||||||
"/Users/lpan/Documents/workspace/appointment_tool/docs/mobile_user_agent_list.xlsx").to_json(
|
"/Users/lpan/Documents/workspace/appointment_tool/docs/mobile_user_agent_list.xlsx").to_json(
|
||||||
@@ -231,8 +240,9 @@ if __name__ == '__main__':
|
|||||||
# contacts = excel_reader.read_names("/Users/lpan/Downloads/gmail_10.xlsx")
|
# contacts = excel_reader.read_names("/Users/lpan/Downloads/gmail_10.xlsx")
|
||||||
# print(contacts)
|
# print(contacts)
|
||||||
# write_new_contacts_to_excel(valid_contacts=contacts)
|
# write_new_contacts_to_excel(valid_contacts=contacts)
|
||||||
excel_reader = ExcelHelper()
|
# excel_reader = ExcelHelper()
|
||||||
excel_reader.check_contact_list("/Users/lpan/Desktop/contact_email_valid.xlsx")
|
# excel_reader.check_contact_list("/Users/lpan/Desktop/contact_email_valid.xlsx")
|
||||||
|
read_links_to_click("/Users/lpan/Downloads/链接.xlsx")
|
||||||
# save_mails_to_db()
|
# save_mails_to_db()
|
||||||
# for mail in excel_reader.read_mails_and_pwd():
|
# for mail in excel_reader.read_mails_and_pwd():
|
||||||
# MONGO_STORE_MANAGER.insert_email(mail)
|
# MONGO_STORE_MANAGER.insert_email(mail)
|
||||||
|
|||||||
Reference in New Issue
Block a user