From a689e66635b04eb918a31bb98e4002c67632fd47 Mon Sep 17 00:00:00 2001 From: PAN Lei Date: Mon, 1 Dec 2025 14:34:07 +0100 Subject: [PATCH 1/9] use FreeIPAPI.py to get ip's geo information --- utils/FreeIPAPI.py | 106 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 utils/FreeIPAPI.py diff --git a/utils/FreeIPAPI.py b/utils/FreeIPAPI.py new file mode 100644 index 0000000..ac3c917 --- /dev/null +++ b/utils/FreeIPAPI.py @@ -0,0 +1,106 @@ +import requests +import json +import sys + +def get_ip_geolocation(ip_address=None): + """ + 使用 FreeIPAPI 查询指定 IP 地址的地理位置信息。 + 如果没有提供 IP 地址,将查询请求本身的公网 IP。 + + Args: + ip_address (str, optional): 要查询的 IP 地址。默认为 None。 + + Returns: + dict: 包含 IP 信息的字典,如果请求失败则返回 None。 + """ + base_url = "https://freeipapi.com/api/json" + + if ip_address: + # 如果指定了 IP,则在 URL 中添加 IP + url = f"{base_url}/{ip_address}" + else: + # 如果未指定 IP,则查询发起请求的 IP + url = base_url + + print(f"正在查询 IP: {ip_address if ip_address else '当前公网 IP'}...") + + try: + # 发送 GET 请求,设置超时时间 + response = requests.get(url, timeout=10) + + # 检查 HTTP 状态码,如果不是 200 则抛出异常 + response.raise_for_status() + + # 解析 JSON 响应 + data = response.json() + + # 检查 API 是否返回了错误信息(FreeIPAPI 在某些情况下会返回 status: 404) + if data.get('status') == 404: + print(f"查询失败:FreeIPAPI 报告未找到该 IP 地址的信息。") + return None + + return data + + except requests.exceptions.RequestException as e: + print(f"请求失败,发生网络错误: {e}") + return None + except json.JSONDecodeError: + print("响应解析失败,可能不是有效的 JSON 格式。") + return None + +def display_ip_info(data): + """ + 格式化并打印 IP 地址信息。 + """ + if not data: + print("无法获取 IP 信息。") + return + + print("\n--- IP 地理位置信息 ---") + + # 使用 .get() 方法安全地获取数据,避免 KeyError + print(f"IP 地址: {data.get('ipAddress', 'N/A')}") + print(f"国家: {data.get('countryName', 'N/A')}") + print(f"国家代码: {data.get('countryCode', 'N/A')}") + print(f"城市: {data.get('cityName', 'N/A')}") + print(f"邮编: {data.get('zipCode', 'N/A')}") + print(f"时区: {data.get('timeZone', 'N/A')}") + # 经纬度 + latitude = data.get('latitude', 'N/A') + longitude = data.get('longitude', 'N/A') + print(f"纬度/经度: {latitude} / {longitude}") + print(f"ISP/组织: {data.get('isp', 'N/A')}") + print("------------------------") + + +def main(): + """ + 主执行函数。可以接受命令行参数作为要查询的 IP 地址。 + """ + # 检查是否有命令行参数传入 + if len(sys.argv) > 1: + # 取第一个参数作为要查询的 IP 地址 + ip_to_query = sys.argv[1] + print(f"检测到命令行参数: {ip_to_query}") + + ip_info = get_ip_geolocation(ip_to_query) + display_ip_info(ip_info) + + else: + # 1. 查询当前公网 IP (不传参数) + print("\n--- 示例 1: 查询当前公网 IP ---") + my_ip_info = get_ip_geolocation() + display_ip_info(my_ip_info) + + # 2. 查询特定 IP 地址 (例如:Google 的 DNS 服务器 8.8.8.8) + print("\n--- 示例 2: 查询特定 IP 地址 (8.8.8.8) ---") + google_dns_ip = "8.8.8.8" + google_ip_info = get_ip_geolocation(google_dns_ip) + display_ip_info(google_ip_info) + + +# 脚本入口点 +if __name__ == "__main__": + ip_info = get_ip_geolocation("80.13.246.205") + print(ip_info) + display_ip_info(ip_info) \ No newline at end of file From 0cf94d44fcafcb16e383ceeb16900d014ba906f1 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Mon, 1 Dec 2025 17:14:56 +0100 Subject: [PATCH 2/9] optimization on mail_reader_all_contacts.py --- db/mongo_manager.py | 5 +- mail/mail_reader_all_contacts.py | 435 ++++++++++++++++++++----------- models/ReserveResultPojo.py | 3 + workers/captcha_result_getter.py | 9 +- 4 files changed, 291 insertions(+), 161 deletions(-) mode change 100755 => 100644 mail/mail_reader_all_contacts.py diff --git a/db/mongo_manager.py b/db/mongo_manager.py index c89a24c..11af86c 100755 --- a/db/mongo_manager.py +++ b/db/mongo_manager.py @@ -83,7 +83,8 @@ class MongoDbManager: result_list.append(ContactPojo.from_firestore_dict(document)) return result_list - def save_links_to_validate(self, link: str, mail_address: str, model: str, _all_contact_list: list): + def save_links_to_validate(self, link: str, mail_address: str, model: str, + _all_contact_list: list, _used_ip: str = ""): collection_to_use = self.db[LINKS_TO_VALIDATE] updated_at = time.strftime("%H:%M:%S", time.localtime()) _ip_country = "FR" @@ -100,6 +101,7 @@ class MongoDbManager: u'serial': serial, u'model': model, u'ip_country': _ip_country, + u'_used_ip': _used_ip, "updated_at": updated_at }, upsert=True) @@ -109,6 +111,7 @@ class MongoDbManager: u'serial': serial, u'model': model, u'ip_country': _ip_country, + u'_used_ip': _used_ip, "updated_at": updated_at }, upsert=True) diff --git a/mail/mail_reader_all_contacts.py b/mail/mail_reader_all_contacts.py old mode 100755 new mode 100644 index f62e87f..6f6e8ab --- a/mail/mail_reader_all_contacts.py +++ b/mail/mail_reader_all_contacts.py @@ -2,11 +2,10 @@ import datetime import email import logging import re -from builtins import list from concurrent.futures import ThreadPoolExecutor from email.header import decode_header from email.message import Message -from typing import Union +from typing import Union, List from imapclient import IMAPClient from db.mongo_manager import MONGO_STORE_MANAGER @@ -15,74 +14,87 @@ from mail.mail_constants import DOMAIN_HOTMAIL, create_imap from models.ReserveResultPojo import ReserveResultPojo from models.mail_pojo import MailPojo, MailAddress -VALIDATION_URL_SUBJECT_fr = 'Validation de votre demande de rendez-vous' +# 定义常量 +VALIDATION_URL_SUBJECT_FR = 'Validation de votre demande de rendez-vous' VALIDATION_URL_SUBJECT_EN = 'Please confirm your appointment request' -VALIDATION_URL_REGEX = """https:\/\/rendezvousparis.hermes.com\/client\/register\/[A-Z0-9]+\/validate.code=[A-Z0-9]+""" -PART_VALIDATION_URL_REGEX = """client\/register\/[A-Z0-9]+\/validate.code=[A-Z0-9]+""" +VALIDATION_URL_REGEX = r"https:\/\/rendezvousparis.hermes.com\/client\/register\/[A-Z0-9]+\/validate.code=[A-Z0-9]+" +PART_VALIDATION_URL_REGEX = r"client\/register\/[A-Z0-9]+\/validate.code=[A-Z0-9]+" HERMES_EMAIL = "no-reply@hermes.com" EMAIL_ADDRESS_REGEX = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b' -date_format = "%d-%b-%Y" # DD-Mon-YYYY e.g., 3-Mar-2014 +# 日期格式 +DATE_FORMAT = "%d-%b-%Y" + +# 邮箱列表(简化为常量) REDIRECTION_MAILS = "appointment2022@aol.com, chenpeijun@aol.com,hongjiang176@aol.com,ciyuexie@aol.com,rutger.62@aol.com,ciccidaniel@aol.com,armasgoodman@aol.com,wknd.gemerine@aol.com,rafmail1981@aol.com,tonovichivanenaki@aol.com,hetland.ari@aol.com,mateusiversen@aol.com,lacerdaraffaello@aol.com,anasida76@aol.com,liamolinari@aol.com,sen70zib@aol.com,mezeiderrick@aol.com,stanisl49avchic@aol.com,damcvrobaneuron@aol.com,suyzanna_fleona@aol.com,dxealing.dissa@aol.com,hogg.karen@aol.com,obocharovamarina@aol.com,buchholzjohann@aol.com,orn.cecchini@aol.com,percivaltorgersen@aol.com,candalgudrun@aol.com,filimonis.76@aol.com,bengann_100@aol.com,axelhanne@aol.com,tiffanylarochelle@aol.com,nicoleta.r@aol.com,eichenbaum.1963@aol.com,kotensasharev@aol.com,samognat32@aol.com,edem_headshot@aol.com,kozmakuzmich1960@aol.com,damonsvensson@aol.com,anders.riva@aol.com,caiminwei123@gmail.com,yulingguo086@gmail.com,yingxiaolu086@gmail.com,lijiazhen0035@gmail.com,fangp370@gmail.com,huangyayu10086@gmail.com,fuziyuan110@gmail.com,xinyingdu886@gmail.com,yasiaforever.1971@aol.com,lukaszfidalgo@aol.com,zaichi29@aol.com,prostotakitak.1974@aol.com,mo90nroe@aol.com,blonde.87@aol.com,dimidrol.1969@aol.com" -def check_email_address(email): - # pass the regular expression - # and the string into the fullmatch() method - if (re.fullmatch(EMAIL_ADDRESS_REGEX, email)): - print("Valid Email") - return True - else: - print("Invalid Email:" + email) - return False +# 邮件处理相关函数 +def is_valid_email(email: str) -> bool: + """验证邮箱地址是否有效""" + return re.fullmatch(EMAIL_ADDRESS_REGEX, email) is not None -def find_from_mail(param): +def extract_email_from_from_address(content: str) -> str: + """从邮件地址中提取邮箱""" + match = re.search(r'[\w.+-]+@[\w-]+\.[\w.-]+', content) + return match.group(0) if match else "" + + +def find_from_mail(param) -> str: + """解析邮件地址""" from_address, encoded_algo = param[0] + + # 处理字节编码 if isinstance(from_address, bytes): from_address = from_address.decode(encoded_algo) - if not check_email_address(from_address) and len(param) == 2: + + # 如果邮箱地址无效,尝试另一种编码 + if not is_valid_email(from_address) and len(param) == 2: from_address, new_encode = param[1] if new_encode is None: new_encode = encoded_algo if isinstance(from_address, bytes): from_address = from_address.decode(new_encode) - return from_address.strip(" ").strip(">").strip("<") + + # 清理邮箱地址 return from_address.strip(" ").strip(">").strip("<") -def extract_email_from_from_address(content: str): - _match = re.search(r'[\w.+-]+@[\w-]+\.[\w.-]+', content) - return _match.group(0) +class MailReader: + """邮件读取器类""" - -class MailReader(): - def __init__(self, login, password): + def __init__(self, login: str, password: str): self.login = login self.password = password @staticmethod - def show_folders(imap) -> list: + def show_folders(imap) -> List[str]: + """获取邮箱文件夹列表""" folders = [] - isImapClient = isinstance(imap, IMAPClient) - if not isImapClient: + is_imap_client = isinstance(imap, IMAPClient) + + if not is_imap_client: + # 处理非IMAPClient对象 for i in imap.list()[1]: l = i.decode().split(' "/" ') folders.append(l[1]) - return folders else: - list = imap.list_folders() - for i in list: + # 处理IMAPClient对象 + folder_list = imap.list_folders() + for i in folder_list: name = i[-1] folders.append(name) - return folders - def read_emails(self, mails_messages: list) -> list: + return folders + + def read_emails(self, mails_messages: List[MailPojo]) -> List[MailPojo]: + """读取邮件""" imap = create_imap(self.login) - isImapClient = isinstance(imap, IMAPClient) - print("isImapClient is " + str(isImapClient)) - if isImapClient: - # authenticate + is_imap_client = isinstance(imap, IMAPClient) + + # 登录邮箱 + if is_imap_client: dat = imap.login(self.login, str(self.password)) print("type is {} for {}".format(dat, self.login)) else: @@ -91,224 +103,333 @@ class MailReader(): mail_list = [] print("read mails from {}".format(self.login)) - if not isImapClient: - folder_list = self.show_folders(imap) - for folder in folder_list: - print("folder is {}".format(folder)) - mail_list.extend(self._get_messages_from_folder(imap, subject=VALIDATION_URL_SUBJECT_fr, - folder=folder)) - mail_list.extend(self._get_messages_from_folder(imap, subject=VALIDATION_URL_SUBJECT_EN, - folder=folder)) - else: - folder_list = self.show_folders(imap) - for folder in folder_list: - print("folder is " + folder) - if folder == "Sent" or folder == "Drafts": - pass - else: - mail_list.extend(self._get_messages_from_folder_for_imapclient(imap, folder=folder)) - if not isImapClient: + + # 获取文件夹列表 + folder_list = self.show_folders(imap) + + # 处理每个文件夹 + for folder in folder_list: + print("folder is {}".format(folder)) + + # 跳过Sent和Drafts文件夹 + if folder in ["Sent", "Drafts"]: + continue + + if is_imap_client: + # 使用IMAPClient处理 + mail_list.extend(self._get_messages_from_folder_for_imapclient(imap, folder)) + else: + # 使用传统IMAP处理 + mail_list.extend(self._get_messages_from_folder(imap, subject=VALIDATION_URL_SUBJECT_FR, folder=folder)) + mail_list.extend(self._get_messages_from_folder(imap, subject=VALIDATION_URL_SUBJECT_EN, folder=folder)) + + # 关闭连接 + if not is_imap_client: imap.close() imap.logout() + + # 添加邮件到结果列表 mails_messages.extend(mail_list) return mail_list - def _get_messages_from_folder(self, imap, subject, folder="INBOX") -> list: + def _get_messages_from_folder(self, imap, subject: str, folder: str = "INBOX") -> List[MailPojo]: + """从指定文件夹获取邮件(传统IMAP方式)""" imap.select(folder) mail_messages = [] - typ, data = imap.search(None, '(SUBJECT "{}" SINCE "{}")'.format(subject, - datetime.datetime.today().strftime( - date_format))) + + # 搜索邮件 + search_query = '(SUBJECT "{}" SINCE "{}")'.format(subject, datetime.datetime.today().strftime(DATE_FORMAT)) + typ, data = imap.search(None, search_query) + for i in data[0].split(): - # fetch the email message by ID - res, msg = imap.fetch(i.decode("utf-8"), "(RFC822)") - body = '' - for response in msg: - if isinstance(response, tuple): - # parse a bytes email into a message object - msg = email.message_from_bytes(response[1]) - # decode the email subject - subject, subject_encoded = decode_header(msg["Subject"])[0] - received_date = msg["Date"] - if isinstance(subject, bytes): - # if it's a bytes, decode to str - subject = subject.decode(subject_encoded) - # decode email sender - from_address = find_from_mail(decode_header(msg.get("From"))) - to_email = find_from_mail(decode_header(msg.get("To"))) - print("Email:", self.login) - print("From:", from_address) - print("To:", to_email) - print("Subject:", subject) - # if the email message is multipart - if msg.is_multipart(): - # iterate over email parts - for part in msg.walk(): - try: - # get the email body - payloads = part.get_payload() - if isinstance(payloads, list): - for payload in payloads: - if isinstance(payload, Message): - body = body + payload.get_payload(decode=True).decode("iso-8859-1") - # print(body) - except Exception as Error: - print(Error) - else: - body = msg.get_payload(decode=True).decode() - print(body) - if VALIDATION_URL_SUBJECT_fr in subject or VALIDATION_URL_SUBJECT_EN in subject: - mail = MailPojo(subject=subject, body=body, from_address=from_address) - if to_email is None: - mail.to_address = self.login - else: - mail.to_address = to_email - mail.mail_address = self.login - mail_messages.append(mail) + try: + # 获取邮件内容 + res, msg = imap.fetch(i.decode("utf-8"), "(RFC822)") + + # 解析邮件 + for response in msg: + if isinstance(response, tuple): + email_message = email.message_from_bytes(response[1]) + + # 解码主题 + subject, subject_encoded = decode_header(email_message["Subject"])[0] + if isinstance(subject, bytes): + subject = subject.decode(subject_encoded) + + # 解码发件人地址 + from_address = find_from_mail(decode_header(email_message.get("From"))) + + # 解码收件人地址 + to_email = find_from_mail(decode_header(email_message.get("To"))) + + print("Email:", self.login) + print("From:", from_address) + print("To:", to_email) + print("Subject:", subject) + + # 获取邮件正文 + body = self._extract_body(email_message) + + # 检查是否是预约验证邮件 + if VALIDATION_URL_SUBJECT_FR in subject or VALIDATION_URL_SUBJECT_EN in subject: + mail = MailPojo( + subject=subject, + body=body, + from_address=from_address + ) + + # 设置收件人地址 + if to_email is None: + mail.to_address = self.login + else: + mail.to_address = to_email + + mail.mail_address = self.login + mail_messages.append(mail) + except Exception as error: + print("Error processing email: {}".format(error)) + return mail_messages - def _get_messages_from_folder_for_imapclient(self, imap, folder="INBOX") -> list: + def _extract_body(self, email_message: Message) -> str: + """提取邮件正文""" + body = "" + + # 遍历邮件部分 + for part in email_message.walk(): + try: + content_type = part.get_content_type() + + if content_type == "text/html": + # 处理HTML内容 + payload = part.get_payload(decode=True) + if payload: + body += payload.decode("utf-8", errors="ignore") + elif content_type == "text/plain": + # 处理纯文本内容 + payload = part.get_payload() + if payload: + body += payload + except Exception as error: + print("Error extracting body part: {}".format(error)) + + return body + + def _get_messages_from_folder_for_imapclient(self, imap, folder: str = "INBOX") -> List[MailPojo]: + """从指定文件夹获取邮件(IMAPClient方式)""" mail_messages = [] + + # 搜索邮件 search_terms = 'SINCE "{}"'.format( - datetime.datetime.today().strftime( - date_format)) + datetime.datetime.today().strftime(DATE_FORMAT)) print("{}: search terms is {}".format(self.login, search_terms)) + imap.select_folder(folder) messages = imap.search(['SINCE', datetime.datetime.today()]) print("{}: {} messages from our best friend".format(self.login, len(messages))) + if len(messages) == 0: return mail_messages + + # 处理每封邮件 for uid, message_data in imap.fetch(messages, 'RFC822').items(): try: email_message = email.message_from_bytes(message_data[b'RFC822']) + + # 获取发件人和主题 from_address = email_message.get('FROM') subject = email_message.get('subject') - body = "" - hermes_mail_adress = "no-reply@hermes.com" - if hermes_mail_adress in from_address or "outlook.com" in from_address or "hotmail" in from_address: - for part in email_message.walk(): - print(part.get_content_type()) - if part.get_content_type() == "text/html": - body = body + part.get_payload(decode=True).decode("utf-8") - elif part.get_content_type() == "text/plain": - body = body + part.get_payload() - if VALIDATION_URL_SUBJECT_fr in subject or VALIDATION_URL_SUBJECT_EN in subject or "Votre=20demande=20de=20rendez-vous" in subject or "Votre demande de rendez-vous" in body: - mail = MailPojo(subject=subject, body=body, from_address=from_address) + + # 检查是否是Hermes邮件 + hermes_mail_address = "no-reply@hermes.com" + if (hermes_mail_address in from_address or + "outlook.com" in from_address or + "hotmail" in from_address): + + # 提取邮件正文 + body = self._extract_body_for_imapclient(email_message) + + # 检查是否是预约验证邮件 + if (VALIDATION_URL_SUBJECT_FR in subject or + VALIDATION_URL_SUBJECT_EN in subject or + "Votre=20demande=20de=20rendez-vous" in subject or + "Votre demande de rendez-vous" in body): + + mail = MailPojo( + subject=subject, + body=body, + from_address=from_address + ) mail.isImapClient = True + print("email is {}".format(self.login)) print("body is {}".format(body)) print("subject is {}".format(subject)) + + # 设置收件人地址 if len(mail.to_address) == 0: if "outlook.com" in from_address or "hotmail.com" in from_address: - # it is a transferred email + # 转发邮件 mail.to_address = extract_email_from_from_address(from_address) else: mail.to_address = self.login + mail_messages.append(mail) except Exception as error: - print(error) - print("error trying to read email_Message for {}".format(self.login)) + print("Error trying to read email_Message for {}: {}".format(self.login, error)) + return mail_messages + def _extract_body_for_imapclient(self, email_message: Message) -> str: + """提取IMAPClient邮件正文""" + body = "" -# -# Find the ReserveResultPojo object from persisted items of DB -# + for part in email_message.walk(): + content_type = part.get_content_type() + + if content_type == "text/html": + payload = part.get_payload(decode=True) + if payload: + body += payload.decode("utf-8", errors="ignore") + elif content_type == "text/plain": + payload = part.get_payload() + if payload: + body += payload + + return body + + +# 邮件处理相关函数 def find_item_by_url(url: str, successful_items) -> Union[None, ReserveResultPojo]: + """根据URL查找预约结果对象""" print("url is :" + url) parts = url.split('/') _id = parts[5] + if len(_id) == 6: for item in successful_items: if item.id == _id: return item + return None def need_to_valid_url(url: str, item: Union[ReserveResultPojo, None]) -> bool: + """判断是否需要验证URL""" print("url is :" + url) parts = url.split('/') - id = parts[5] - if len(id) == 6: + _id = parts[5] + + if len(_id) == 6: if item: if item.url_validated is not None: return not item.url_validated else: - # if url_validated is None + # 如果url_validated为None,需要验证 return True return True - else: - print("id not valid:{}".format(id)) - return False + + print("id not valid:{}".format(_id)) + return False def need_to_check_email(mail: str, successful_items) -> bool: + """判断是否需要检查邮件""" print("successful_items size is " + str(len(successful_items))) + + # 特殊处理 if mail == "saigecong1990@pissmail.com": return True - filtered_items = list(filter(lambda item: item.email == mail, successful_items)) - # has validated value - if len(filtered_items) > 0: - validated_items = list(filter( - lambda filtered_item: filtered_item.url_validated is not None and filtered_item.url_validated is True, - filtered_items)) - if len(validated_items) > 0: - return False - else: - return True - else: - return True + + # 过滤已验证的项目 + filtered_items = [item for item in successful_items if item.email == mail] + + # 检查是否有已验证的项目 + validated_items = [item for item in filtered_items + if item.url_validated is not None and item.url_validated is True] + + return len(validated_items) == 0 -def find_links_to_validate_from_mail_list(mail_list: list, logger): +def find_links_to_validate_from_mail_list(mail_list: List[MailAddress], logger) -> None: + """从邮件列表中查找需要验证的链接""" if not mail_list: return - # check time before start checking emails + + # 检查时间前开始检查邮件 contact_to_book_list = MONGO_STORE_MANAGER.get_all_contact_to_book_list() successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day() + mails_messages = [] - with ThreadPoolExecutor(max_workers=200) as executor: + + # 使用线程池处理邮件 + with ThreadPoolExecutor(max_workers=20) as executor: + futures = [] + for mail in mail_list: - # check whether we need to read mail + # 检查是否需要读取邮件 if need_to_check_email(mail.mail, successful_items): mail_reader = MailReader(mail.mail, mail.password) - executor.submit(mail_reader.read_emails, mails_messages) + future = executor.submit(mail_reader.read_emails, mails_messages) + futures.append(future) + + # 等待所有任务完成 + for future in futures: + try: + future.result() + except Exception as e: + print("Error processing mail: {}".format(e)) + + # 刷新成功的项目 _refreshed_successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day() + + # 处理邮件中的链接 for mail in mails_messages: match = re.search(VALIDATION_URL_REGEX, mail.body) if match: url = match.group(0) _item = find_item_by_url(url, _refreshed_successful_items) + if need_to_valid_url(url, _item): logger.info("need to validate url: " + url) _model = "" + _used_ip = "" if _item: _model = _item.model - MONGO_STORE_MANAGER.save_links_to_validate(url, mail.to_address, model=_model, - _all_contact_list=contact_to_book_list) + _used_ip = _item.current_ip + + MONGO_STORE_MANAGER.save_links_to_validate( + url, + mail.to_address, + model=_model, + _all_contact_list=contact_to_book_list, _used_ip= _used_ip) else: logger.info("do not need to click url --> {}".format(mail.mail_address)) +# 主函数 if __name__ == '__main__': - # mail_address1 = MailAddress(mail="tinagonzales685585@aol.com", password="yhihvdkrbxnksema") - # mail_list = [mail_address1] + # 读取联系人列表 contact_to_book_list = read_contacts( - # file_name="/Users/rdv/Desktop/contact_list_not_used_contacts.xlsx") - # file_name="/Users/lpan/Desktop/contact_list_not_used_contacts.xlsx") - # file_name="/Users/rdv/Desktop/contact_list_2025-10-30.xlsx") - # file_name="~/Desktop/contact_list_all.xlsx") file_name="~/Desktop/contact_list_2025-11-06.xlsx") - # file_name="/Users/rdv/Desktop/contact_list_all_studo_gmx_us.xlsx") - # file_name="/Users/rdv/Desktop/contact_list_2025-05-24.xlsx") + + # 获取目标邮箱列表 all_mail_list = MONGO_STORE_MANAGER.get_destination_emails() + + # 筛选需要检查的邮件列表 mail_list_to_check = [] for contact in contact_to_book_list: for mail in all_mail_list: if contact.mail == mail.mail: mail_list_to_check.append(mail) + + # 设置日志记录器 logger = logging.getLogger() + + # 获取已验证的链接列表 _all_links = MONGO_STORE_MANAGER.get_links_to_validate() + + # 过滤掉已处理的邮件 filter_mail = [] for mail_pojo in mail_list_to_check: _to_add = True @@ -317,6 +438,6 @@ if __name__ == '__main__': _to_add = False if _to_add: filter_mail.append(mail_pojo) - # filter_mail.append(MailAddress("saigecong1990@pissmail.com", "cvExXKOP8oY1D@")) - # filter_mail = [MailAddress("saigecong1990@pissmail.com", "cvExXKOP8oY1D@")] + # filter_mail = [MailAddress("utatapi@gmx.net", "RSAzHAFek8s")] + # 处理邮件 find_links_to_validate_from_mail_list(filter_mail, logger) diff --git a/models/ReserveResultPojo.py b/models/ReserveResultPojo.py index 43d9939..cc99e5b 100755 --- a/models/ReserveResultPojo.py +++ b/models/ReserveResultPojo.py @@ -113,6 +113,9 @@ class ReserveResultPojo: if 'validated_at' in source: validated_at = source['validated_at'] result.validated_at = validated_at + if 'current_ip' in source: + current_ip = source['current_ip'] + result.current_ip = current_ip result.id = id return result diff --git a/workers/captcha_result_getter.py b/workers/captcha_result_getter.py index a81e104..817c052 100644 --- a/workers/captcha_result_getter.py +++ b/workers/captcha_result_getter.py @@ -2,8 +2,11 @@ import json import random import re from typing import Union + +import requests + # import requests -from curl_cffi import requests +# from curl_cffi import requests from captcha.jspl_encoder_wrapper import encrpte_to_jspl from models.jsdata_le_pojo import JsDataLeTypePojo @@ -106,7 +109,7 @@ class CaptchaResultGetter: return None def get_ch_raw_data_from_js_data(self, js_data: JsDataPojo, old_valid_cookie) -> str: - _tag_version = "5.1.8" + _tag_version = "5.1.9" _jspl = encrpte_to_jspl(js_data.to_url_encoded_json()) _raw_data = "jspl={}&eventCounters=%5B%5D&jsType=ch&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv={}".format( _jspl, old_valid_cookie, _tag_version) @@ -177,7 +180,7 @@ class CaptchaResultGetter: # old_valid_cookie=old_valid_cookie) _cid = get_datadome_cookies(old_valid_cookie) _jspl = encrpte_to_jspl(js_le_type_data.to_url_encoded_json()) - _raw_data = "jsData={}&eventCounters=%7B%22mousemove%22%3A{}%2C%22click%22%3A{}%2C%22scroll%22%3A{}%2C%22touchstart%22%3A{}%2C%22touchend%22%3A{}%2C%22touchmove%22%3A{}%2C%22keydown%22%3A{}%2C%22keyup%22%3A{}%7D&jsType=le&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv=5.1.8".format( + _raw_data = "jsData={}&eventCounters=%7B%22mousemove%22%3A{}%2C%22click%22%3A{}%2C%22scroll%22%3A{}%2C%22touchstart%22%3A{}%2C%22touchend%22%3A{}%2C%22touchmove%22%3A{}%2C%22keydown%22%3A{}%2C%22keyup%22%3A{}%7D&jsType=le&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv=5.1.9".format( _jspl, mousemove_count, click_count, scroll_count, touch_count, touch_count, touch_move, key_count, From e954055042f3df55ff86f8c856c25c7cf31e4da1 Mon Sep 17 00:00:00 2001 From: PAN Lei Date: Fri, 5 Dec 2025 17:27:15 +0100 Subject: [PATCH 3/9] can insert timestamp to mongo db --- db/mongo_manager.py | 5 +++- workers/link_validator_with_provided_list.py | 29 ++++++++++++++++++-- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/db/mongo_manager.py b/db/mongo_manager.py index 11af86c..6dc8e16 100755 --- a/db/mongo_manager.py +++ b/db/mongo_manager.py @@ -168,7 +168,9 @@ class MongoDbManager: return link_list def link_validated_for_result(self, link: str, linkPojo: LinkPojo, state=True, is_duplicated=False, - is_invalid=False, segement_position=1, ua="", model=""): + is_invalid=False, segement_position=1, ua="", model="", timestamp_in_s: list = None): + if timestamp_in_s is None: + timestamp_in_s = [] print("link_validated_for_result() called with url = " + link) if is_duplicated: _id = link.split("/")[-2] @@ -194,6 +196,7 @@ class MongoDbManager: "validated_by_model": model, "serial": linkPojo.serial, "validated_by_ua": ua, + "timestamp_in_s": "-".join(str(x) for x in timestamp_in_s), "validated_by": validated_by}}, upsert=True) # remove the link from db diff --git a/workers/link_validator_with_provided_list.py b/workers/link_validator_with_provided_list.py index 8fb6bf6..6007ef6 100644 --- a/workers/link_validator_with_provided_list.py +++ b/workers/link_validator_with_provided_list.py @@ -14,7 +14,8 @@ from db.mongo_manager import MONGO_STORE_MANAGER from models.LinkPojo import LinkPojo from models.result_pojo import RequestResult from proxy_manager.proxy_manager import ProxyManager -from queue_message.CookiesPublisher import CookiesPublisher, MORNING_DATA_CACHE, MORNING_DATA_CACHE_BAK +from queue_message.CookiesPublisher import CookiesPublisher, MORNING_DATA_CACHE, MORNING_DATA_CACHE_BAK, \ + MORNING_DATA_CACHE_2 from queue_message.appointmentrequestsender import QUEUE_HOST, REQUEST_DATA_QUEUE, credentials from utils.AppLogging import init_logger from utils.user_agent_helper import generate_headers_from_request_message @@ -31,6 +32,7 @@ def filter_link_pojo_list_with_model(_received_dict, link_to_validate_list): _model = _received_dict["model"] print("link list size before filter = {}".format(len(link_to_validate_list))) _links_filtered_by_model = list(filter(lambda link_pojo: link_pojo.model == _model, link_to_validate_list)) + # 注意:下面这行代码在原文件中覆盖了过滤结果,这里保留原逻辑,如需真正过滤请删除下一行 _links_filtered_by_model = link_to_validate_list print("link list size after filter = {}".format(len(_links_filtered_by_model))) if len(_links_filtered_by_model) > 0: @@ -91,9 +93,31 @@ class LinkValidatorWithProvidedList(threading.Thread): print(response.text) if "Votre demande de rendez-vous est enregistrée" in _content: print(response.url) + + # --- 更新时间戳逻辑开始 --- + _timestamps_to_save = [] + if _received_dict is not None: + try: + current_timestamp = int(time.time()) + # 如果不存在 timestampInS 字段,则初始化 + if "timestampInS" not in _received_dict: + _received_dict["timestampInS"] = [] + + # 确保是列表并添加当前时间戳 + if isinstance(_received_dict["timestampInS"], list): + _received_dict["timestampInS"].append(current_timestamp) + _timestamps_to_save = _received_dict["timestampInS"] + print("Updated timestampInS with: {}".format(current_timestamp)) + except Exception as e: + print(f"Error updating timestamp: {e}") + # --- 更新时间戳逻辑结束 --- + + # 调用 DB Manager,传入 timestampInS MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo, segement_position=threading.currentThread().name, - ua=_ua, model=_model) + ua=_ua, model=_model, + timestamp_in_s=_timestamps_to_save) + # set new cookies _cookies_to_set = response.headers['set-cookie'] self.cookie.load(_cookies_to_set) @@ -240,6 +264,7 @@ def validate_all_links(_contact_serial_list): # _queue_name = "REQUEST_DATA" _queue_name = MORNING_DATA_CACHE_BAK # _queue_name = MORNING_DATA_CACHE + # _queue_name = MORNING_DATA_CACHE_2 _thread_list = [] if len(_first_25_percent_links) >= 200: _segment_number = 200 From 38d50c77dc43375d91909f47f1a3cc32bfc0b1dc Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Fri, 5 Dec 2025 20:43:30 +0100 Subject: [PATCH 4/9] remove type from ReserveResultPojo --- models/ReserveResultPojo.py | 11 +---------- workers/sender.py | 2 +- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/models/ReserveResultPojo.py b/models/ReserveResultPojo.py index cc99e5b..74b29b1 100755 --- a/models/ReserveResultPojo.py +++ b/models/ReserveResultPojo.py @@ -16,7 +16,6 @@ class PublishType(Enum): @dataclass_json @dataclass class ReserveResultPojo: - type: PublishType = PublishType.ERROR phone: str = "" message: str = "" url: str = "" @@ -41,11 +40,6 @@ class ReserveResultPojo: @staticmethod def from_firestore_dict(source): - publish_type = PublishType.ERROR - if 'type' in source: - publish_type = source['type'] - if publish_type: - publish_type = PublishType[publish_type] if 'phone' in source: phone = source['phone'] else: @@ -54,8 +48,6 @@ class ReserveResultPojo: url = source['url'] else: url = "" - if 'id' in source: - id = source['id'] if '_id' in source: id = source['_id'] else: @@ -74,7 +66,7 @@ class ReserveResultPojo: else: firstName = "" - result = ReserveResultPojo(type=publish_type, phone=phone, + result = ReserveResultPojo(phone=phone, url=url, email=email, firstName=firstName, lastName=lastName) if 'accepted' in source: @@ -121,7 +113,6 @@ class ReserveResultPojo: def to_firestore_dict(self): dest = { - u'type': self.type.value, u'id': self.id, u'phone': self.phone, u'firstName': self.firstName, diff --git a/workers/sender.py b/workers/sender.py index 089ee3f..aa95c55 100644 --- a/workers/sender.py +++ b/workers/sender.py @@ -44,7 +44,7 @@ class Sender: if url == "https://rendezvousparis.hermes.com/client/welcome": return id = url.split("/")[-1] - result = ReserveResultPojo(type=status, phone=contact.phone, message=status.value, url=url, + result = ReserveResultPojo(phone=contact.phone, message=status.value, url=url, firstName=contact.first_name, lastName=contact.last_name, email=contact.mail, passport=contact.passport, ccid=contact.ccid) result.ua = contact.ua From fcc9ef1b98e7aad6fde1940cd44847417184cbb8 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Fri, 5 Dec 2025 20:52:21 +0100 Subject: [PATCH 5/9] delete unused code --- mail/mail_constants.py | 84 ++++----- parallel_request_sender.py | 87 ---------- queue_message/parallel_requestsender.py | 221 ------------------------ 3 files changed, 33 insertions(+), 359 deletions(-) delete mode 100644 parallel_request_sender.py delete mode 100644 queue_message/parallel_requestsender.py diff --git a/mail/mail_constants.py b/mail/mail_constants.py index bc5441a..b1156d2 100755 --- a/mail/mail_constants.py +++ b/mail/mail_constants.py @@ -2,6 +2,7 @@ import imaplib from imapclient import IMAPClient +# 邮件域名常量 DOMAIN_YAHOO = "yahoo.com" DOMAIN_SINA = "sina.com" DOMAIN_HOTMAIL = "hotmail.com" @@ -10,9 +11,9 @@ DOMAIN_163 = "163.com" DOMAIN_RAMBLER_RU = "rambler.ru" DOMAIN_ALICE_IT = "alice.it" DOMAIN_MARS_DTI_NE_JP = "mars.dti.ne.jp" -DOMAN_BTVM_NE_JP = "btvm.ne.jp" -DOMAN_AURORA_DTI_NE_JP = "aurora.dti.ne.jp" -DOMAN_GMAIL = "gmail.com" +DOMAIN_BTVM_NE_JP = "btvm.ne.jp" +DOMAIN_AURORA_DTI_NE_JP = "aurora.dti.ne.jp" +DOMAIN_GMAIL = "gmail.com" DOMAIN_GMX = "gmx.com" DOMAIN_GMX_NET = "gmx.net" DOMAIN_GMX_AT = "gmx.at" @@ -27,6 +28,7 @@ DOMAIN_NAVER = "naver.com" DOMAIN_INBOX_LV = "inbox.lv" DOMAIN_GMX_DE = "gmx.de" +# 垃圾邮件域名 DOMAIN_PISS_MAIL = "pissmail.com" DOMAIN_INCEL_EMAIL = "incel.email" DOMAIN_SHITPOSTING_EXPERT = "shitposting.expert" @@ -38,6 +40,7 @@ DOMAIN_WEB_DE = "web.de" DOMAIN_OUTLOOK_COM = "outlook.com" DOMAIN_FIREMAIL_DE = "firemail.de" +# IMAP服务器地址常量 AOL_IMAP_SERVER = "imap.aol.com" IMAP_SERVER_163 = "imap.163.com" IMAP_SERVER_SINA = "imap.sina.com" @@ -49,18 +52,17 @@ ALICE_IMAP_SERVER = "in.alice.it" TIME_IT_SERVER = "imap.tim.it" MARS_DTI_NE_JP_SERVER = "imap.cm.dream.jp" NAVER_SERVER = "imap.naver.com" -BTVM_NE_JP = "imap.btvm.ne.jp" -SEREVER_GMAIL = "imap.gmail.com" -SERVER_IMAGE_ONET = "imap.poczta.onet.pl" -SERVER_GMX = "imap.gmx.com" -SERVER_GMX_NET = "imap.gmx.net" -SERVER_GMX_AT = "imap.gmx.at" -SERVER_FIREMAIL_DE = "imap.firemail.de" -SERVER_PISS_MAIL = "mail.pissmail.com" -INBOX_LV = "mail.inbox.lv" -SERVER_WEB_DE = "imap.web.de" -IMAP_SERVER_DOMAIN_GAZETA_PL = "imap.gazeta.pl" - +BTVM_NE_JP_SERVER = "imap.btvm.ne.jp" +GMAIL_IMAP_SERVER = "imap.gmail.com" +ONET_IMAP_SERVER = "imap.poczta.onet.pl" +GMX_IMAP_SERVER = "imap.gmx.com" +GMX_NET_IMAP_SERVER = "imap.gmx.net" +GMX_AT_IMAP_SERVER = "imap.gmx.at" +FIREMAIL_DE_IMAP_SERVER = "imap.firemail.de" +PISS_MAIL_IMAP_SERVER = "mail.pissmail.com" +INBOX_LV_IMAP_SERVER = "mail.inbox.lv" +WEB_DE_IMAP_SERVER = "imap.web.de" +GAZETA_PL_IMAP_SERVER = "imap.gazeta.pl" def show_folders(imap) -> list: folders = [] @@ -82,69 +84,49 @@ def show_folders(imap) -> list: def create_imap(login: str): - # create an IMAP4 class with SSL + # 创建一个IMAP4类实例 if DOMAIN_163 in login: imap = IMAPClient(IMAP_SERVER_163, use_uid=True) elif DOMAIN_YAHOO in login: - # imap = imaplib.IMAP4_SSL(YAHOO_IMAP_SERVER) imap = IMAPClient(YAHOO_IMAP_SERVER, use_uid=True) elif DOMAIN_FIREMAIL_DE in login: - # imap = imaplib.IMAP4_SSL(SERVER_FIREMAIL_DE) - imap = IMAPClient(SERVER_FIREMAIL_DE, use_uid=True) + imap = IMAPClient(FIREMAIL_DE_IMAP_SERVER, use_uid=True) elif DOMAIN_GMX in login or DOMAIN_GMX_FR in login or DOMAIN_GMX_US in login or DOMAIN_GMX_CH in login or DOMAIN_GMX_PT in login or DOMAIN_GMX_SG in login: - # imap = imaplib.IMAP4_SSL(SERVER_GMX) - imap = IMAPClient(SERVER_GMX, use_uid=True) + imap = IMAPClient(GMX_IMAP_SERVER, use_uid=True) elif DOMAIN_SINA in login: - # imap = imaplib.IMAP4_SSL(IMAP_SERVER_SINA) imap = IMAPClient(IMAP_SERVER_SINA, use_uid=True) elif DOMAIN_HOTMAIL in login or DOMAIN_OUTLOOK_COM in login: - # imap = imaplib.IMAP4_SSL(HOTMAIL_IMAP_SERVER) imap = IMAPClient(HOTMAIL_IMAP_SERVER, use_uid=True) elif DOMAIN_RAMBLER_RU in login: - # imap = imaplib.IMAP4_SSL(RAMBLER_IMAP_SERVER) imap = IMAPClient(RAMBLER_IMAP_SERVER, use_uid=True) - elif DOMAN_BTVM_NE_JP in login: - # imap = imaplib.IMAP4_SSL(BTVM_NE_JP) - imap = IMAPClient(BTVM_NE_JP, use_uid=True) - elif DOMAN_GMAIL in login: - # imap = imaplib.IMAP4_SSL(SEREVER_GMAIL, port=993) - imap = IMAPClient(SEREVER_GMAIL, use_uid=True) + elif DOMAIN_BTVM_NE_JP in login: + imap = IMAPClient(BTVM_NE_JP_SERVER, use_uid=True) + elif DOMAIN_GMAIL in login: + imap = IMAPClient(GMAIL_IMAP_SERVER, use_uid=True) elif DOMAIN_ONET in login: - imap = IMAPClient(SERVER_IMAGE_ONET, use_uid=True) + imap = IMAPClient(ONET_IMAP_SERVER, use_uid=True) elif DOMAIN_TIM_IT in login: - # imap = imaplib.IMAP4(TIME_IT_SERVER) imap = IMAPClient(TIME_IT_SERVER, use_uid=True) elif DOMAIN_ALICE_IT in login: - # imap = imaplib.IMAP4(ALICE_IMAP_SERVER, port=143) imap = IMAPClient(ALICE_IMAP_SERVER, use_uid=True) elif DOMAIN_MARS_DTI_NE_JP in login: - # imap = imaplib.IMAP4(MARS_DTI_NE_JP_SERVER, port=143) imap = IMAPClient(MARS_DTI_NE_JP_SERVER, use_uid=True) - elif DOMAN_AURORA_DTI_NE_JP in login: - # imap = imaplib.IMAP4(MARS_DTI_NE_JP_SERVER, port=143) + elif DOMAIN_AURORA_DTI_NE_JP in login: imap = IMAPClient(MARS_DTI_NE_JP_SERVER, use_uid=True) elif DOMAIN_NAVER in login: - # imap = imaplib.IMAP4_SSL(NAVER_SERVER, port=993) imap = IMAPClient(NAVER_SERVER, use_uid=True) elif DOMAIN_GMX_DE in login or DOMAIN_GMX_NET in login: - # imap = imaplib.IMAP4_SSL(SERVER_GMX_NET, port=993) - imap = IMAPClient(SERVER_GMX_NET, use_uid=True) + imap = IMAPClient(GMX_NET_IMAP_SERVER, use_uid=True) elif DOMAIN_GMX_AT in login: - # imap = imaplib.IMAP4_SSL(SERVER_GMX_AT, port=993) - imap = IMAPClient(SERVER_GMX_AT, use_uid=True) + imap = IMAPClient(GMX_AT_IMAP_SERVER, use_uid=True) elif DOMAIN_GAZETA_PL in login: - # imap = imaplib.IMAP4_SSL(IMAP_SERVER_DOMAIN_GAZETA_PL, port=993) - imap = IMAPClient(IMAP_SERVER_DOMAIN_GAZETA_PL, use_uid=True) + imap = IMAPClient(GAZETA_PL_IMAP_SERVER, use_uid=True) elif DOMAIN_INBOX_LV in login: - # imap = imaplib.IMAP4_SSL(INBOX_LV, port=993) - imap = IMAPClient(INBOX_LV, use_uid=True) + imap = IMAPClient(INBOX_LV_IMAP_SERVER, use_uid=True) elif DOMAIN_WEB_DE in login: - # imap = imaplib.IMAP4_SSL(SERVER_WEB_DE, port=993) - imap = IMAPClient(SERVER_WEB_DE, use_uid=True) + imap = IMAPClient(WEB_DE_IMAP_SERVER, use_uid=True) elif DOMAIN_PISS_MAIL in login or DOMAIN_CHILD_PIZZA in login or DOMAIN_DMC_CHAT in login or DOMAIN_GENOCIDE_FUN in login or DOMAIN_HATESJE_WS in login or DOMAIN_INCEL_EMAIL in login or DOMAIN_SHITPOSTING_EXPERT in login: - # imap = imaplib.IMAP4_SSL(SERVER_PISS_MAIL, port=993) - imap = IMAPClient(SERVER_PISS_MAIL, use_uid=True) + imap = IMAPClient(PISS_MAIL_IMAP_SERVER, use_uid=True) else: - # imap = imaplib.IMAP4_SSL(AOL_IMAP_SERVER) imap = IMAPClient(AOL_IMAP_SERVER, use_uid=True) - return imap + return imap \ No newline at end of file diff --git a/parallel_request_sender.py b/parallel_request_sender.py deleted file mode 100644 index cf6f45c..0000000 --- a/parallel_request_sender.py +++ /dev/null @@ -1,87 +0,0 @@ -import datetime -import logging -import sys -from threading import Thread - -from db.mongo_manager import MONGO_STORE_MANAGER -from excel_reader import read_contacts -from models.contact_pojo import ContactPojo -from queue_message.CookiesPublisher import CookiesPublisher, SHARED_OBJECT, TEST_QUEUE -from queue_message.appointmentrequestsender import AppointmentRequestSender -from queue_message.parallel_requestsender import ParallelRequestSender -from utiles import is_time_between -from utils.AppLogging import init_logger -from workers.proxies_constants import MOBILE_PROXY_LIST_FR - -IPFIY = 'http://api.ipify.org' -NGROK_TEST = "https://bcc6-193-164-156-53.ngrok-free.app" - - -def is_already_sent(contact: ContactPojo) -> bool: - already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day() - for required_contact in already_sent_contacts: - if contact.mail == required_contact.email: - return True - return False - - -def filter_contacts(_contact_list: list) -> list: - already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day() - _link_to_validate_list = MONGO_STORE_MANAGER.get_links_to_validate() - _contact_list_to_book = [] - for contact in _contact_list: - _to_add = True - for booked in already_sent_contacts: - if contact.mail == booked.email: - _to_add = False - # 如果已经收到链接了,就不要再请求 - for link_to_validate in _link_to_validate_list: - if contact.mail == link_to_validate.email: - logger.info("{}: link already received".format(contact.mail)) - _to_add = False - if _to_add: - _contact_list_to_book.append(contact) - - return _contact_list_to_book - - -def is_open(): - return is_time_between(datetime.time(10, 30), datetime.time(19, 00)) - - -count = 0 -init_logger() -logger = logging.getLogger() - -logger.addHandler(logging.StreamHandler(stream=sys.stdout)) - - -def send_appointment_request(message_queue_name, _contact_list): - global count - count = count + 1 - for _contact in _contact_list: - logger.info(_contact) - _cookiesPublisher = CookiesPublisher(queue_name=message_queue_name) - _cookiesPublisher.set_up_connection() - receiver = ParallelRequestSender(sub_contact_list=_contact_list, proxy_to_use_list=MOBILE_PROXY_LIST_FR, - queue_name=message_queue_name, just_send=True, - cookiesPublisher=_cookiesPublisher, logger=logger) - print("count is " + str(count)) - receiver.run() - - -if __name__ == '__main__': - contacts_file_path = '~/Desktop/31_03_to_test.xlsx' - _contact_list = read_contacts(contacts_file_path)[0:20] - _contact_list_to_book = filter_contacts(_contact_list) - _segment_number = 1 - logger.info("{} contacts to book".format(len(_contact_list_to_book))) - last_thread = None - for i in range(0, _segment_number): - logger.info("segment is {}".format(i)) - _step = int(len(_contact_list_to_book) / _segment_number) - _sublist = _contact_list_to_book[i * _step:_step * (i + 1)] - _thread1 = Thread(target=send_appointment_request, args=(TEST_QUEUE, _sublist)) - last_thread = _thread1 - _thread1.start() - last_thread.join() diff --git a/queue_message/parallel_requestsender.py b/queue_message/parallel_requestsender.py deleted file mode 100644 index 02a3fc8..0000000 --- a/queue_message/parallel_requestsender.py +++ /dev/null @@ -1,221 +0,0 @@ -import datetime -import json -import random -import threading -import time -from concurrent.futures.thread import ThreadPoolExecutor - -import pika - -from db.mongo_manager import MONGO_STORE_MANAGER -from mail.mail_reader_all_contacts import find_links_to_validate_from_mail_list -from models.ReserveResultPojo import ReserveResultPojo -from models.contact_pojo import ContactPojo -from models.jsdata_le_pojo import JsDataLeTypePojo -from models.jsdata_pojo import JsDataPojo -from models.result_pojo import RequestResult -from queue_message.CookiesPublisher import CookiesPublisher -from queue_message.appointmentrequestsender import filter_contacts, is_open -from utiles import is_time_between -from workers.captcha_result_getter import CaptchaResultGetter, HERMES_REGISTER -from workers.sender import Sender - -QUEUE_HOST = "appointment.lpaconsulting.fr" -REQUEST_DATA_QUEUE = 'REQUEST_DATA' -credentials = pika.PlainCredentials('appointment', 'ZyuhJZ2xEYWhElhpJjy7YEpZGZwNYJz2fHIu') - - -def is_already_sent(contact: ContactPojo) -> bool: - already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day() - for required_contact in already_sent_contacts: - if contact.mail == required_contact.email: - return True - return False - - -class ParallelRequestSender(threading.Thread): - def __init__(self, sub_contact_list: list, proxy_to_use_list, logger, cookiesPublisher: CookiesPublisher, - just_send=False, - queue_name=REQUEST_DATA_QUEUE): - super().__init__() - self.connection = None - self.just_send = just_send - self.logger = logger - self.already_tried_contact_list = [] - self.cookiesPublisher = cookiesPublisher - self.channel = None - self.valid_csrf = None - self.list_to_retrieve_mails = sub_contact_list - self.contact_list = sub_contact_list - self.queue_name = queue_name - self.proxy_to_use_list = proxy_to_use_list - self.already_read_emails = False - - def set_up_connection(self): - self.connection = pika.BlockingConnection( - pika.ConnectionParameters(host=QUEUE_HOST, port=5672, credentials=credentials)) - self.channel = self.connection.channel() - - def listen_to_queue(self, callback): - self.logger.info("listen to queue {}".format(self.queue_name)) - self.channel.basic_qos(prefetch_count=1) - self.channel.basic_consume(queue=self.queue_name, auto_ack=False, on_message_callback=callback) - self.channel.start_consuming() - - def send_request(self, _received_cookies, _received_dict, js_data: JsDataPojo, logger, - _contact) -> RequestResult: - _proxy_to_use = self.generate_proxy() - logger.info("send_request for contact: {}, cookies: {}".format(_contact.mail, _received_cookies)) - logger.info("proxy to use is {}".format(_proxy_to_use)) - sender = Sender(_received_cookies, cookiesPublisher=self.cookiesPublisher, received_dict=_received_dict, - proxy_to_use=_proxy_to_use, logger=logger) - # remove already sent contacts - if is_open(): - captchaResultGetter = CaptchaResultGetter() - _new_cookies = captchaResultGetter.get_valid_ch_cookie(sender.proxy_to_use, js_data, - old_valid_cookie=_received_cookies) - # self.contact_list = filter_contacts(self.contact_list) - logger.info(_contact.mail) - valid_csrf = captchaResultGetter.get_csrf( - proxy_to_use=_proxy_to_use, js_data=js_data, - cookie=_new_cookies) - if isinstance(valid_csrf, str): - if _new_cookies is not None: - logger.info("new cookie is " + _new_cookies) - # m_s_c = f.scroll - m_s_c = random.randint(0, 3) - m_c_c = random.randint(3, 5) # click count - m_m_c = random.randint(3, 5) # move count - m_cm_r = m_c_c / m_m_c - m_ms_r = random.randint(-1, 1) - - js_le_data = JsDataLeTypePojo(glrd=_received_dict['glrd'], glvd=_received_dict['glvd'], - hc=_received_dict['hc'], - ua=_received_dict['ua'], br_oh=_received_dict['br_oh'], - br_ow=_received_dict['br_ow'], - ars_h=_received_dict['ars_h'], ars_w=_received_dict['ars_w'], - pr=_received_dict['pr'], - plg=_received_dict['plg'], br_h=_received_dict['br_h'], - br_w=_received_dict['br_w'], - plu=_received_dict['plu'], vnd=_received_dict['vnd'], - dvm=_received_dict['dvm'], - ts_mtp=_received_dict['ts_mtp'], eva=_received_dict['eva'], - rs_h=_received_dict['rs_h'], - rs_w=_received_dict['rs_w'], rs_cd=_received_dict['rs_cd'], - m_s_c=m_s_c, m_m_c=m_m_c, m_c_c=m_c_c, - m_cm_r=m_cm_r, m_ms_r=m_ms_r, emd=_received_dict['emd']) - time.sleep(random.randint(1, 4)) - _new_le_cookies = captchaResultGetter.get_le_valid_cookie(proxy_to_use=_proxy_to_use, - js_le_type_data=js_le_data, - old_valid_cookie=_new_cookies) - if _new_le_cookies is not None: - # self.logger.info("new le type cookie is " + _new_le_cookies) - sender.cookie_str = _new_le_cookies - time.sleep(random.randint(1, 3)) - self.already_tried_contact_list.append(_contact) - can_continue = sender.send_request(HERMES_REGISTER, js_data, _contact, csrf=valid_csrf) - if can_continue == RequestResult.SUCCESS: - # 让服务器读取成功的约会 - try: - self.logger.info("try to remove success contact from list to retrieve mails") - self.list_to_retrieve_mails.remove(_contact) - except Exception as e: - self.logger.info( - "exception while remove success contact from list to retrieve mails") - print(e) - else: - can_continue = RequestResult.COOKIES_ERROR - else: - can_continue = RequestResult.COOKIES_ERROR - if can_continue == RequestResult.BLOCKED: - self.logger.info("cannot continue, we are blocked " + str(self.valid_csrf)) - elif can_continue == RequestResult.PROXY_ERROR: - self.logger.info("PROXY_ERROR, will not reset valid_csrf") - elif can_continue == RequestResult.COOKIES_ERROR: - self.logger.info("COOKIES_ERROR, will not reset valid_csrf") - else: - self.logger.info("can continue, will reset valid_csrf") - self.valid_csrf = None - return can_continue - else: - return valid_csrf - # return RequestResult.CTRF_ERROR - - def getChTypeJsDataFromDict(self, _received_dict) -> JsDataPojo: - return JsDataPojo(glrd=_received_dict['glrd'], glvd=_received_dict['glvd'], hc=_received_dict['hc'], - ua=_received_dict['ua'], br_oh=_received_dict['br_oh'], br_ow=_received_dict['br_ow'], - ars_h=_received_dict['ars_h'], ars_w=_received_dict['ars_w'], pr=_received_dict['pr'], - plg=_received_dict['plg'], br_h=_received_dict['br_h'], br_w=_received_dict['br_w'], - plu=_received_dict['plu'], vnd=_received_dict['vnd'], dvm=_received_dict['dvm'], - ts_mtp=_received_dict['ts_mtp'], eva=_received_dict['eva'], - rs_h=_received_dict['rs_h'], - rs_w=_received_dict['rs_w'], rs_cd=_received_dict['rs_cd'], emd=_received_dict['emd']) - - def generate_proxy(self): - _port = random.randint(40001, 49999) - _chosen_proxy = random.choice(self.proxy_to_use_list) - self.logger.info("generated port is {}".format(_port)) - _proxy_to_use = {} - _proxy_to_use["http"] = _chosen_proxy["http"].format(_port) - _proxy_to_use["https"] = _chosen_proxy["https"].format(_port) - return _proxy_to_use - - def on_message(self, ch, method, properties, body): - _message_count = self.cookiesPublisher.message_count() - self.logger.info("message count in queue is {}".format(_message_count)) - # prepare the contact list - if self.just_send: - self.contact_list = filter_contacts(self.contact_list, self.already_tried_contact_list) - else: - self.contact_list = filter_contacts(self.contact_list) - # remove already booked contacts - random.shuffle(self.contact_list) - _received_object = body.decode("UTF-8") - self.logger.info(f" [x] Received {_received_object}") - step = 5 - _received_dict = json.loads(_received_object) - js_data = self.getChTypeJsDataFromDict(_received_dict) - _received_cookies = _received_dict["cookiesStr"] - if len(self.contact_list) > step: - _sub_list = self.contact_list[0:step] - result = None - for con in _sub_list: - with ThreadPoolExecutor(max_workers=step) as executor: - result = executor.submit(self.send_request, _received_cookies, _received_dict, js_data, self.logger, - con) - self.logger.info("result is: " + str(result.result())) - if result.result() == RequestResult.SUCCESS: - self.logger.info("Success for {}, with cookies{}".format(con.mail, _received_cookies)) - if result.result() == RequestResult.BLOCKED or result.result() == RequestResult.CTRF_ERROR: - ch.basic_ack(delivery_tag=method.delivery_tag) - - else: - ch.basic_reject(delivery_tag=method.delivery_tag, requeue=True) - else: - self.retrieve_invalidate_urls() - self.logger.info("empty list") - time.sleep(120) - self.logger.info("will basic_reject method.delivery_tag: " + str(method.delivery_tag)) - ch.basic_reject(delivery_tag=method.delivery_tag, requeue=True) - - def run(self): - self.logger.info(threading.currentThread().name + " starts") - self.set_up_connection() - self.listen_to_queue(self.on_message) - self.channel.start_consuming() - - def retrieve_invalidate_urls(self): - if not self.already_read_emails and len(self.list_to_retrieve_mails) > 0: - self.logger.info("will retrieve validate urls") - time.sleep(30) - _mail_list = MONGO_STORE_MANAGER.get_destination_emails() - _mail_list_filtered = [] - for mail in _mail_list: - for _contact in self.list_to_retrieve_mails: - if _contact.mail == mail.mail: - _mail_list_filtered.append(mail) - self.logger.info("will call find_links_to_validate_from_mail_list, size = " + str(len(_mail_list_filtered))) - find_links_to_validate_from_mail_list(_mail_list_filtered) - self.already_read_emails = True - else: - self.logger.info("already read emails, will not retrieve validate urls") From 727cee69bbf84c4759c83cb5a7afebfc70426eef Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Fri, 5 Dec 2025 22:48:38 +0100 Subject: [PATCH 6/9] add timestamp prefix with 'ap:' while send appointment request --- models/ReserveResultPojo.py | 2 ++ queue_message/appointmentrequestsender.py | 1 - workers/de_cookie_generator.py | 17 ----------------- workers/sender.py | 15 +++++++++++++++ 4 files changed, 17 insertions(+), 18 deletions(-) delete mode 100644 workers/de_cookie_generator.py diff --git a/models/ReserveResultPojo.py b/models/ReserveResultPojo.py index 74b29b1..ca0166f 100755 --- a/models/ReserveResultPojo.py +++ b/models/ReserveResultPojo.py @@ -37,6 +37,7 @@ class ReserveResultPojo: proxy: str = None ua: str = None current_ip: str = "" + timestampInS: list = None @staticmethod def from_firestore_dict(source): @@ -129,6 +130,7 @@ class ReserveResultPojo: u'url_validated': self.url_validated, u'proxy': self.proxy, u'current_ip': self.current_ip, + u'timestamp_in_s': "-".join(str(x) for x in self.timestampInS), u'ua': self.ua, } diff --git a/queue_message/appointmentrequestsender.py b/queue_message/appointmentrequestsender.py index f3ba8c1..7fa8232 100644 --- a/queue_message/appointmentrequestsender.py +++ b/queue_message/appointmentrequestsender.py @@ -9,7 +9,6 @@ from typing import Optional import pika from db.mongo_manager import MONGO_STORE_MANAGER -from mail.lan_mail_helper import check_mail, check_all_need_to_check_emails from mail.mail_reader_all_contacts import find_links_to_validate_from_mail_list from models.ReserveResultPojo import ReserveResultPojo from models.contact_pojo import ContactPojo diff --git a/workers/de_cookie_generator.py b/workers/de_cookie_generator.py deleted file mode 100644 index 5a8e563..0000000 --- a/workers/de_cookie_generator.py +++ /dev/null @@ -1,17 +0,0 @@ -import logging -import sys - -from proxy_manager.proxy_manager import ProxyManager -from queue_message.CookiesPublisher import REQUEST_DATA_QUEUE_DE, CookiesPublisher, MORNING_DATA_CACHE -from utils.AppLogging import init_logger -from workers.cookie_generator import CookiesGenerator - -if __name__ == '__main__': - init_logger() - logger = logging.getLogger() - logger.addHandler(logging.StreamHandler(stream=sys.stdout)) - cookiesPublisher = CookiesPublisher(queue_name=MORNING_DATA_CACHE) - cookiesPublisher.set_up_connection() - cookieGenerator = CookiesGenerator(proxy_manager=ProxyManager(logger=logger), cookiesPublisher=cookiesPublisher, - logger=logger) - cookieGenerator.run() diff --git a/workers/sender.py b/workers/sender.py index aa95c55..5c7223e 100644 --- a/workers/sender.py +++ b/workers/sender.py @@ -64,6 +64,9 @@ class Sender: result.store_type = store_type result.model = model result.created_at = time.strftime("%H:%M:%S", time.localtime()) + # Add timestampInS to the result object if it exists in received_dict + if 'timestampInS' in self.received_dict: + result.timestampInS = self.received_dict['timestampInS'] collection_name = str(datetime.date.today()) MONGO_STORE_MANAGER.insert_reserve_result(collection_name=collection_name, reserve=result) @@ -100,6 +103,18 @@ class Sender: self.logger.info("will publish to queue {}".format(new_coolies_str)) # upload the cookie to queue self.received_dict['cookiesStr'] = new_coolies_str + # Add timestampInS field with current timestamp in seconds as a list + current_timestamp = "ap:" + str(int(time.time())) + if 'timestampInS' in self.received_dict: + # If timestampInS already exists, ensure it's a list and append new timestamp + if isinstance(self.received_dict['timestampInS'], list): + self.received_dict['timestampInS'].append(current_timestamp) + else: + # Convert to list if it's not already a list + self.received_dict['timestampInS'] = [self.received_dict['timestampInS'], current_timestamp] + else: + # Create new list with the timestamp + self.received_dict['timestampInS'] = [current_timestamp] self.logger.info("body in json:{}".format(json.dumps(self.received_dict))) if self.cookiesPublisher is not None: self.cookiesPublisher.publish_body(json.dumps(self.received_dict)) From e0b1b2a4f4a02ac44e1f6fc267eaf3ba99647f93 Mon Sep 17 00:00:00 2001 From: PAN Lei Date: Sat, 6 Dec 2025 11:06:42 +0100 Subject: [PATCH 7/9] correction on uploading timestamp while sending appointment request --- workers/sender.py | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/workers/sender.py b/workers/sender.py index 5c7223e..69bdddb 100644 --- a/workers/sender.py +++ b/workers/sender.py @@ -93,14 +93,14 @@ class Sender: contact.ua = js_data.ua contact.current_ip = get_address_ip(proxy_to_use=self.proxy_to_use) _appointment_url = _redirect_url - self.publish_message_to_queue(contact, status=PublishType.SUCCESS, url=_appointment_url, - store_type=selected_store, model=model) + # self.publish_message_to_queue(contact, status=PublishType.SUCCESS, url=_appointment_url, + # store_type=selected_store, model=model) self.cookie.load(_cookies_to_set) new_cookies = {k: v.value for k, v in self.cookie.items()} new_coolies_str = "" for key in new_cookies: new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";" - self.logger.info("will publish to queue {}".format(new_coolies_str)) + self.logger.info("will publish to queue") # upload the cookie to queue self.received_dict['cookiesStr'] = new_coolies_str # Add timestampInS field with current timestamp in seconds as a list @@ -115,6 +115,8 @@ class Sender: else: # Create new list with the timestamp self.received_dict['timestampInS'] = [current_timestamp] + self.publish_message_to_queue(contact, status=PublishType.SUCCESS, url=_appointment_url, + store_type=selected_store, model=model) self.logger.info("body in json:{}".format(json.dumps(self.received_dict))) if self.cookiesPublisher is not None: self.cookiesPublisher.publish_body(json.dumps(self.received_dict)) @@ -211,27 +213,6 @@ class Sender: self.logger.info(response.text) self.apply_redirect(response=response, old_headers=headers, contact=contact, js_data=js_data, selected_store=_selected_store, model=model) - # self.logger.info("{}:{}".format(contact.mail, response.url)) - # contact.ua = js_data.ua - # contact.current_ip = get_address_ip(proxy_to_use=proxy_to_use) - # _appointment_url = response.url - # self.publish_message_to_queue(contact, status=PublishType.SUCCESS, url=_appointment_url, - # store_type=_selected_store) - # cookies_to_set = response.headers['set-cookie'] - # self.cookie.load(cookies_to_set) - # new_cookies = {k: v.value for k, v in self.cookie.items()} - # new_coolies_str = "" - # for key in new_cookies: - # new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";" - # self.logger.info("will publish to queue {}".format(new_coolies_str)) - # # upload the cookie to queue - # self.received_dict['cookiesStr'] = new_coolies_str - # self.logger.info("body in json:{}".format(json.dumps(self.received_dict))) - # if self.cookiesPublisher is not None: - # self.cookiesPublisher.publish_body(json.dumps(self.received_dict)) - # # self.cookie_str = new_coolies_str - # self.logger.info("successful request, mail:{}".format(contact.mail)) - # self.send_jsdata_request(js_data, _appointment_url, new_coolies_str) return RequestResult.SUCCESS else: print(response.text) From 539042881e2285d94458fee64587a3bc89c40e18 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Sat, 6 Dec 2025 11:08:36 +0100 Subject: [PATCH 8/9] add some comments --- workers/link_validator_with_provided_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workers/link_validator_with_provided_list.py b/workers/link_validator_with_provided_list.py index 6007ef6..dd7dfe5 100644 --- a/workers/link_validator_with_provided_list.py +++ b/workers/link_validator_with_provided_list.py @@ -32,7 +32,7 @@ def filter_link_pojo_list_with_model(_received_dict, link_to_validate_list): _model = _received_dict["model"] print("link list size before filter = {}".format(len(link_to_validate_list))) _links_filtered_by_model = list(filter(lambda link_pojo: link_pojo.model == _model, link_to_validate_list)) - # 注意:下面这行代码在原文件中覆盖了过滤结果,这里保留原逻辑,如需真正过滤请删除下一行 + # 注意:下面这行代码在原文件中覆盖了过滤结果,这里保留原逻辑,如需真正过滤请删除下一行,经过测试,点链接的时候不需要过滤MODEL _links_filtered_by_model = link_to_validate_list print("link list size after filter = {}".format(len(_links_filtered_by_model))) if len(_links_filtered_by_model) > 0: From 9085a3c1c48e5e717f9d11dcc8f078d2d3faf863 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Tue, 9 Dec 2025 10:30:43 +0100 Subject: [PATCH 9/9] use 100 threads to read mails --- mail/mail_reader_all_contacts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mail/mail_reader_all_contacts.py b/mail/mail_reader_all_contacts.py index 6f6e8ab..5708a17 100644 --- a/mail/mail_reader_all_contacts.py +++ b/mail/mail_reader_all_contacts.py @@ -363,7 +363,7 @@ def find_links_to_validate_from_mail_list(mail_list: List[MailAddress], logger) mails_messages = [] # 使用线程池处理邮件 - with ThreadPoolExecutor(max_workers=20) as executor: + with ThreadPoolExecutor(max_workers=100) as executor: futures = [] for mail in mail_list: