diff --git a/read_confirmation_emails.py b/read_confirmation_emails.py index d35fe37..44cd6bb 100755 --- a/read_confirmation_emails.py +++ b/read_confirmation_emails.py @@ -4,8 +4,10 @@ from src.pojo import ReserveResultPojo def create_message_from_item(item: ReserveResultPojo): - return f"Phone: {item.phone}\n" \ - f"URL: {item.url}\n" \ + return f"约会信息\n" \ + f"电话: {item.phone}\n" \ + f"链接: {item.url}\n" \ + f"护照: {item.passport}\n" \ f"Email: {item.mail}\n" \ f"First Name: {item.first_name}\n" \ f"Last Name: {item.last_name}\n" diff --git a/src/check_results.py b/src/check_results.py deleted file mode 100644 index cfca543..0000000 --- a/src/check_results.py +++ /dev/null @@ -1,146 +0,0 @@ -import datetime -import logging -import random -import threading -from concurrent.futures import ThreadPoolExecutor -from typing import Union - -from playwright.sync_api import sync_playwright - -from src.db.mirgration.migration_tools import migre_accepted_appointment -from src.db.mongo_manager import MONGO_STORE_MANAGER -from src.definitions import LOG_SUBJECT_EVENT, TYPE_EVENT_CHECK_RESULTS -from src.logs.LogSender import LogSender -from src.notification.AcceptedResultPojo import get_accepted_result_from -from src.notification.mailer import Mailer -from src.pojo.ReserveResultPojo import ReserveResultPojo -from src.pojo.ResultEnum import ResultEnum -from src.proxy.proxy_type import ProxyType -from src import definitions, params -from src.workers.TlsPlaywright import TlsPlaywright - -SORRY_SENTENCE_FR = "nous sommes sincèrement désolés de n'avoir pu vous satisfaire cette fois-ci" -SORRY_SENTENCE_EN = "we are extremely sorry that we were not able to fulfill" -NOT_AVAILABLE_CONTENT = "For more than 130 years, our House has offered its full expertise to satisfy" -PENDING_SENTENCE = "Ce soir, entre 20:00 et 20:30, vous obtiendrez une réponse par e-mail." -URL_TO_VALID_SENTENCE = "Nous avons envoyé un lien par e-mail." -PENDING_SENTENCE_EN = "This evening between 20:00 and 20:30 you will receive a response by email." -CAPTCHA_URL = "https://geo.captcha-delivery.com" -# URLs to ignore during checking results -BLANK_URL = "about:blank" -WELCOME_URL = "https://rendezvousparis.hermes.com/client/welcome" - -mailer = Mailer() -oracle_log_sender = LogSender() - - -class ResultChecker: - tls = TlsPlaywright() - - def __init__(self): - self.logger = logging.getLogger("Worker") - - def load_page(self, playwright, proxy, url, device, headless) -> Union[str, None]: - try: - self.browser = playwright.webkit.launch(headless=headless, timeout=90000, proxy=proxy) - pixel_2 = self.tls.playwright.devices[device] - context = self.browser.new_context(**pixel_2, locale='fr-FR') - self.page = context.new_page() - # hide webdriver information - self.page.add_init_script("""() => { - Object.defineProperty(navigator,'webdriver',{get: () => undefined}); - Object.defineProperty(navigator, 'platform', { - get: () => { - return "iPhone"; - }}); - } - """) - self.page.goto(url, timeout=90000) - return self.page.content() - except Exception as error: - print(error) - self.logger.info("will close browser") - self.browser.close() - return None - - def run(self, reserve_pojo: ReserveResultPojo, firestore_collection, headless=False, need_send_email=False): - url = reserve_pojo.url - print("url is " + url) - content = None - proxy = params.get_proxy(ProxyType.OXYLABS) - device = random.choice(params.DEVICES) - self.logger.info("模拟设备: " + device) - while content is None: - content = self.load_page(self.tls.playwright, proxy, url, device, headless) - proxy = params.get_proxy(ProxyType.OXYLABS) - print(content) - print("Stopped worker in ", threading.current_thread().name) - if SORRY_SENTENCE_FR in content: - print("status is REFUSED") - status = ResultEnum.REFUSED - elif SORRY_SENTENCE_EN in content: - print("status is REFUSED") - status = ResultEnum.REFUSED - elif PENDING_SENTENCE in content: - print("status is PENDING") - status = ResultEnum.PENDING - elif URL_TO_VALID_SENTENCE in content: - print("status is REFUSED") - status = ResultEnum.REFUSED - elif PENDING_SENTENCE_EN in content: - print("status is PENDING") - status = ResultEnum.PENDING - elif NOT_AVAILABLE_CONTENT in content: - print("status is REFUSED") - status = ResultEnum.REFUSED - elif CAPTCHA_URL in content: - print("status is BLOCKED") - self.browser.close() - else: - print("status is ACCEPTED") - status = ResultEnum.ACCEPTED - # send email - try: - mailer.send_email(get_accepted_result_from(reserve_pojo), to_all=need_send_email) - except Exception as err: - print(err) - reserve_pojo.accepted = status - MONGO_STORE_MANAGER.update_reserve_result(reserve_pojo.id, status) - self.browser.close() - - -def check_results(headless=False): - # get the list - oracle_log_sender.send_log(msg="开始检查约会结果", subject=LOG_SUBJECT_EVENT, type=TYPE_EVENT_CHECK_RESULTS) - db_manager = definitions.firebase_store_manager - firestore_collection = db_manager.get_all_successful_items() - reserve_list = MONGO_STORE_MANAGER.get_all_successful_items_for_day() - print("size is " + str(len(reserve_list))) - start_check(reserve_list, firestore_collection, headless, need_send_email=False) - reserve_list = MONGO_STORE_MANAGER.get_all_successful_items_for_day() - start_check(reserve_list, firestore_collection, headless, need_send_email=True) - # copy the accepted info to the accepted collection - migre_accepted_appointment(str(datetime.date.today())) - - -def start_check(reserve_list, firestore_collection, headless: bool, need_send_email: bool): - count = 0 - with ThreadPoolExecutor(max_workers=20) as executor: - for reserve in reserve_list: - count = count + 1 - if reserve.accepted is None or ResultEnum.ACCEPTED.value == reserve.accepted: - print("will check result") - if "hotmail" in reserve.email: - if reserve.url != BLANK_URL: - if reserve.url != WELCOME_URL: - executor.submit(ResultChecker().run, reserve, firestore_collection, headless, - need_send_email) - else: - print("status is " + reserve.accepted) - - print(count) - - -# need to start at 21h00 -if __name__ == '__main__': - check_results() diff --git a/src/db/mongo_manager.py b/src/db/mongo_manager.py index 3b60077..30422cf 100755 --- a/src/db/mongo_manager.py +++ b/src/db/mongo_manager.py @@ -1,6 +1,7 @@ import datetime import logging import time +from typing import Union from pymongo import MongoClient @@ -24,6 +25,7 @@ DESTINATION_EMAIL_LIST = "DESTINATION_EMAIL_LIST" LINKS_TO_VALIDATE = "LINKS_TO_VALIDATE" INVALID_EMAIL_LIST = "INVALID_EMAIL_LIST" ADRESSE_LIST = "address" +CONTACT_LIST_SERIAL_MAP = "CONTACT_LIST_SERIAL_MAP" class MongoDbManager: @@ -87,6 +89,17 @@ class MongoDbManager: except Exception as Error: self.logger.info(Error) + def upload_contact_list_to_collection(self, contact_list: list, collection_name): + + try: + collection_to_use = self.db[collection_name] + for contact in contact_list: + # collection_to_use.insert_one(contact.to_firestore_dict()) + collection_to_use.replace_one(filter={'_id': contact.mail, }, replacement=contact.to_firestore_dict(), + upsert=True) + except Exception as Error: + self.logger.info(Error) + def get_all_accepted_appointments(self) -> list: collection_name = ACCEPTED_APPOINTMENT_LIST appointment_list_contacts = [] @@ -306,10 +319,30 @@ class MongoDbManager: _all_contact_list.append(ContactPojo.from_firestore_dict(document)) return _all_contact_list - def save_links_to_validate(self, link: str, mail_address: str, _all_contact_list: list): + def get_all_contact_serial_list(self) -> list: + result_list = [] + cursor = self.db[CONTACT_LIST_SERIAL_MAP] + for document in cursor.find(): + result_list.append(ContactPojo.from_firestore_dict(document)) + return result_list + + def save_links_to_validate(self, link: str, mail_address: str, _all_contact_list: list, + _item: Union[ReserveResultPojo, None], contact_serial_map_list): collection_to_use = self.db[LINKS_TO_VALIDATE] updated_at = time.strftime("%H:%M:%S", time.localtime()) _ip_country = "FR" + _model = "" + serial = "" + if _item: + _model = _item.source_from + serial = _item.serial + if len(serial) == 0: + for contact in contact_serial_map_list: + if contact.mail == mail_address: + serial = contact.serial + _model = contact.model + break + # find ip_country info for _contact in _all_contact_list: if _contact.mail == mail_address: @@ -319,6 +352,8 @@ class MongoDbManager: collection_to_use.replace_one(filter={'_id': mail_address, }, replacement={ u'url': link, u'email': mail_address, + u'model': _model, + u'serial': serial, u'ip_country': _ip_country, "updated_at": updated_at }, @@ -326,6 +361,8 @@ class MongoDbManager: else: collection_to_use.replace_one(filter={'_id': link, }, replacement={ u'url': link, + u'model': _model, + u'serial': serial, u'ip_country': _ip_country, "updated_at": updated_at }, diff --git a/src/discord_helper.py b/src/discord_helper.py index f42876c..56780f3 100644 --- a/src/discord_helper.py +++ b/src/discord_helper.py @@ -1,6 +1,6 @@ import requests -DISCORD_SERVER_URL = "http://127.0.0.1:9000/discord/send" +DISCORD_SERVER_URL = "http://appointment.lpaconsulting.fr:9000/discord/send" def send_message(message: str): diff --git a/src/mail/mail_reader.py b/src/mail/mail_reader.py index a566a00..caf5325 100755 --- a/src/mail/mail_reader.py +++ b/src/mail/mail_reader.py @@ -181,10 +181,7 @@ class MailReader(): return mail_messages -def need_to_valid_url(url: str, successful_items) -> bool: - # return True - # if len(successful_items) == 0: - # return False +def need_to_valid_url(url: str, email, successful_items): print("url is :" + url) parts = url.split('/') id = parts[5] @@ -192,16 +189,19 @@ def need_to_valid_url(url: str, successful_items) -> bool: for item in successful_items: if item.id == id: if item.url_validated is not None: - return not item.url_validated + return item, not item.url_validated else: # if url_validated is None if item.url_validated is not None: - return not item.url_validated - return True - return True + return item, not item.url_validated + return item, True + for item in successful_items: + if item.mail == email: + return item, True + return None, True else: print("id not valid:{}".format(id)) - return False + return None, False def need_to_check_email(mail: str, successful_items) -> bool: @@ -256,20 +256,16 @@ def read_mails(): # get ip_country info _refreshed_successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day() _all_contact_list = MONGO_STORE_MANAGER.get_all_contacts_to_book() + contact_serial_map_list = MONGO_STORE_MANAGER.get_all_contact_serial_list() with ThreadPoolExecutor(max_workers=10) as executor: for mail in mails_messages: - # if mail.isImapClient: - # match = re.search(PART_VALIDATION_URL_REGEX, mail.body.replace("\n", "")) - # else: match = re.search(VALIDATION_URL_REGEX, mail.body) if match: - # url_to_validate = match.group(0) - # if mail.isImapClient: - # url = "https://rendezvousparis.hermes.com/" + url_to_validate.replace("3D", "") - # else: url = match.group(0) - if need_to_valid_url(url, _refreshed_successful_items): - MONGO_STORE_MANAGER.save_links_to_validate(url, mail.to_address, _all_contact_list) + _item, is_need_to = need_to_valid_url(url, mail.to_address, _refreshed_successful_items) + if is_need_to: + MONGO_STORE_MANAGER.save_links_to_validate(url, mail.to_address, _all_contact_list, _item, + contact_serial_map_list) # url_validator = LinkValidator(url) print("need to validate url: " + url) # executor.submit(url_validator.start_page, params.get_proxy(ProxyType.OXYLABS), False) diff --git a/src/mail/mail_reader_all_contacts.py b/src/mail/mail_reader_all_contacts.py index 053db92..c0a17da 100755 --- a/src/mail/mail_reader_all_contacts.py +++ b/src/mail/mail_reader_all_contacts.py @@ -12,6 +12,7 @@ from imapclient import IMAPClient from src.db.mongo_manager import MONGO_STORE_MANAGER from src.logs.AppLogging import init_logger from src.mail.mail_constants import DOMAIN_HOTMAIL, create_imap +from src.mail.mail_reader import need_to_valid_url from src.pojo.mail.mail_pojo import MailPojo from src.utils.timeutiles import is_time_between @@ -200,25 +201,25 @@ class MailReader(): return mail_messages -def need_to_valid_url(url: str, successful_items) -> bool: - # return True - # if len(successful_items) == 0: - # return False - print("url is :" + url) - parts = url.split('/') - id = parts[5] - if len(id) == 6: - for item in successful_items: - if item.id == id: - if item.url_validated is not None: - return not item.url_validated - else: - # if url_validated is None - return True - return True - else: - print("id not valid:{}".format(id)) - return False +# def need_to_valid_url(url: str, successful_items) -> bool: +# # return True +# # if len(successful_items) == 0: +# # return False +# print("url is :" + url) +# parts = url.split('/') +# id = parts[5] +# if len(id) == 6: +# for item in successful_items: +# if item.id == id: +# if item.url_validated is not None: +# return not item.url_validated +# else: +# # if url_validated is None +# return True +# return True +# else: +# print("id not valid:{}".format(id)) +# return False def need_to_check_email(mail: str, successful_items) -> bool: @@ -281,6 +282,8 @@ def read_all_mails(contact_to_book_list=None): executor.submit(mail_reader.read_emails, mails_messages) # 在读邮件时候,可能会有其他的约会提交或者约会的链接确认,所以需要刷新一下成功的列表 _refreshed_successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day() + contact_serial_map_list = MONGO_STORE_MANAGER.get_all_contact_serial_list() + with ThreadPoolExecutor(max_workers=10) as executor: for mail in mails_messages: match = re.search(VALIDATION_URL_REGEX, mail.body) @@ -290,10 +293,12 @@ def read_all_mails(contact_to_book_list=None): # url = "https://rendezvousparis.hermes.com/" + url_to_validate.replace("3D", "") # else: url = match.group(0) - if need_to_valid_url(url, _refreshed_successful_items): + _item, is_need_to = need_to_valid_url(url, mail.to_address, _refreshed_successful_items) + if is_need_to: print("need to validate url: " + url) MONGO_STORE_MANAGER.save_links_to_validate(url, mail.to_address, - _all_contact_list=contact_to_book_list) + _all_contact_list=contact_to_book_list, _item=_item, + contact_serial_map_list=contact_serial_map_list) else: print("do not need to click url --> {}".format(mail.mail_address)) diff --git a/src/person_name/contact_manager.py b/src/person_name/contact_manager.py index 31029be..1ea76f9 100755 --- a/src/person_name/contact_manager.py +++ b/src/person_name/contact_manager.py @@ -3,13 +3,15 @@ import random from pathlib import Path import xlsxwriter -# from src.person_name.cython_extract_methods import filter_already_validated_contacts, read_pinyin_list_from_file from src.db.mongo_manager import MONGO_STORE_MANAGER from src.pojo.contact_pojo import ContactPojo from src.utils.contacts.generate_random_passport_id import get_random_passport_id_number from src.utils.excel_reader import read_contacts, fr_phone_number_prefix, get_random_fr_phone_numbers, ExcelHelper +# from src.person_name.cython_extract_methods import filter_already_validated_contacts, read_pinyin_list_from_file + + DEFAULT_SERIAL = "47e7e36b" @@ -86,7 +88,7 @@ def write_new_contacts_to_excel(valid_contacts: list, file_name=str(datetime.dat def generate_valid_contact_list_for_day(segment_number=1): - _collection_name = "2024-09-23" + _collection_name = "2024-12-13" _valid_contact_list = MONGO_STORE_MANAGER.get_all_successful_items_for_one_day(_collection_name) _all_contacts = MONGO_STORE_MANAGER.get_all_contacts_to_book() _contact_to_save = [] @@ -101,8 +103,8 @@ def generate_valid_contact_list_for_day(segment_number=1): _contact.passport = _true_contact.passport _contact.first_name = _true_contact.first_name - if len(_contact.serial) == 0: - _contact.serial = DEFAULT_SERIAL + # if len(_contact.serial) == 0: + # _contact.serial = DEFAULT_SERIAL if _contact.url_validated: if _contact.last_name is not None and len(_contact.last_name) > 0: _need_to_save = True @@ -132,6 +134,13 @@ def merge_contact_list_files(file_list: list, final_file_name="merged_contact_li _all_contact_list.extend(read_contacts(file)) for _con in _all_contact_list: _con.store = "random" + _contact_serial_map = MONGO_STORE_MANAGER.get_all_contact_serial_list() + for contact in _all_contact_list: + if contact.serial == DEFAULT_SERIAL: + for _contact_serial in _contact_serial_map: + if _contact_serial.mail == contact.mail: + contact.serial = _contact_serial.serial + break print(len(_all_contact_list)) _list_without_duplicate = list(set(_all_contact_list)) print(len(_list_without_duplicate)) @@ -211,8 +220,11 @@ if __name__ == '__main__': # generate_valid_contact_list_for_day(segment_number=2) # get_old_validated_contact_list() # print("end at {}".format(datetime.datetime.now())) - generate_all_contact_list() - # merge_contact_list_files( - # ["/Users/lpan/Desktop/contact_list_all_old_not_used_contact.xlsx", - # "/Users/lpan/Desktop/contact_list_2024-06-26.xlsx"]) + # generate_all_contact_list() + merge_contact_list_files( + ["/Users/lpan/Desktop/contact_list_2024-11-04.xlsx", + "/Users/lpan/Desktop/contact_list_2024-11-07.xlsx", + "/Users/lpan/Desktop/contact_list_2024-11-08.xlsx", + "/Users/lpan/Desktop/contact_list_2024-11-06.xlsx" + ]) # fix_phone_number_format("/Users/lpan/Desktop/15_05_to_test.xlsx") diff --git a/src/pojo/ReserveResultPojo.py b/src/pojo/ReserveResultPojo.py index 602f7bc..a9aa475 100644 --- a/src/pojo/ReserveResultPojo.py +++ b/src/pojo/ReserveResultPojo.py @@ -39,6 +39,7 @@ class ReserveResultPojo: validated_at = None ip_address = None ua = "" + model = "" serial = "" def __hash__(self): @@ -109,9 +110,10 @@ class ReserveResultPojo: if 'message' in source: message = source['message'] result.message = message - if 'source' in source: - source_from = source['source'] + if 'source_from' in source: + source_from = source['source_from'] result.source_from = source_from + result.model = source_from if 'sim_position' in source: sim_position = source['sim_position'] result.sim_position = sim_position diff --git a/src/pojo/contact_pojo.py b/src/pojo/contact_pojo.py index fd11341..414513b 100644 --- a/src/pojo/contact_pojo.py +++ b/src/pojo/contact_pojo.py @@ -16,6 +16,7 @@ class ContactPojo: isp: str = None ua: str = "" serial: str = "" + model: str = "" def __repr__(self): return "phone:{}, passport:{}, last_name:{}, first_name:{}, mail:{}, store:{}, ip_country:{},isp:{}".format( @@ -84,5 +85,8 @@ class ContactPojo: if source.get('serial'): serial = source['serial'] result.serial = serial + if source.get('source_from'): + model = source['source_from'] + result.model = model result.ua = ua return result diff --git a/src/utils/contacts/check_all_collections.py b/src/utils/contacts/check_all_collections.py index e801bfc..42a6fc5 100644 --- a/src/utils/contacts/check_all_collections.py +++ b/src/utils/contacts/check_all_collections.py @@ -1,6 +1,8 @@ +from pathlib import Path + from src.db.mongo_manager import MONGO_STORE_MANAGER -from src.person_name.contact_manager import write_new_contacts_to_excel from src.pojo.contact_pojo import ContactPojo +from src.utils.excel_reader import read_contacts DOMAIN_TO_INCLUDE = ["aol.com", "yahoo.com", "gmx.de", "gmx.net", "inbox.lv"] @@ -32,11 +34,11 @@ def get_all_not_used_mails(): return not_used_emails -def get_all_old_contact_set(): +def get_all_old_contact_set(start_with="2024"): all_collection_list = get_all_collections() collection_to_check = [] for col in all_collection_list: - if col.startswith('20'): + if col.startswith(start_with): collection_to_check.append(col) items_set = set() for collection in collection_to_check: @@ -45,6 +47,15 @@ def get_all_old_contact_set(): return items_set +def get_all_contact_with_source_from(): + _list_to_check = get_all_old_contact_set() + _to_return = [] + for contact in _list_to_check: + if contact.serial is not None and len(contact.serial) > 0: + _to_return.append(contact) + return _to_return + + def generate_not_used_contact_list(): not_used_mail_list = get_all_not_used_mails() all_old_contact_set = get_all_old_contact_set() @@ -63,6 +74,12 @@ def generate_not_used_contact_list(): return contact_list +def upload_to_collection(): + _contacts = read_contacts(str(Path.home()) + "/Desktop/contact_list_2024-11-05.xlsx") + MONGO_STORE_MANAGER.upload_contact_list_to_collection(_contacts,"CONTACT_LIST_SERIAL_MAP") + + if __name__ == '__main__': - contact_to_save = generate_not_used_contact_list() - write_new_contacts_to_excel(valid_contacts=contact_to_save, file_name="all_old_not_used_contact") + upload_to_collection() + # contact_to_save = get_all_contact_with_source_from() + # write_new_contacts_to_excel(valid_contacts=contact_to_save, file_name="contacts_with_source")