improvement while reading mails

This commit is contained in:
2026-04-24 18:20:41 +02:00
parent 64e47e05e7
commit 3a3a36082b
6 changed files with 532 additions and 277 deletions
+26
View File
@@ -2,6 +2,7 @@ import datetime
import logging import logging
import time import time
import os import os
from typing import Optional
from pymongo import MongoClient from pymongo import MongoClient
@@ -22,6 +23,7 @@ DESTINATION_EMAIL_LIST = "DESTINATION_EMAIL_LIST"
LINKS_TO_VALIDATE = "LINKS_TO_VALIDATE" LINKS_TO_VALIDATE = "LINKS_TO_VALIDATE"
INVALID_EMAIL_LIST = "INVALID_EMAIL_LIST" INVALID_EMAIL_LIST = "INVALID_EMAIL_LIST"
CONTACT_LIST_SERIAL_MAP = "CONTACT_LIST_SERIAL_MAP" CONTACT_LIST_SERIAL_MAP = "CONTACT_LIST_SERIAL_MAP"
MAIL_READ_LOG = "MAIL_READ_LOG" # 记录每个邮箱上次读取时间
class MongoDbManager: class MongoDbManager:
@@ -264,5 +266,29 @@ class MongoDbManager:
collection_to_use = self.db[LINKS_TO_VALIDATE] collection_to_use = self.db[LINKS_TO_VALIDATE]
collection_to_use.delete_one({"_id": linkPojo.email}) collection_to_use.delete_one({"_id": linkPojo.email})
# ── Mail read-time tracking ────────────────────────────────────
def get_last_mail_read_time(self, mail: str) -> Optional[datetime.datetime]:
"""返回指定邮箱上次被读取的 UTC 时间,若从未读取则返回 None。"""
try:
doc = self.db[MAIL_READ_LOG].find_one({"_id": mail})
if doc and "last_read_at" in doc:
return doc["last_read_at"]
except Exception as err:
self.logger.warning("get_last_mail_read_time error: %s", err)
return None
def update_mail_read_time(self, mail: str) -> None:
"""将指定邮箱的上次读取时间更新为当前 UTC 时间。"""
try:
self.db[MAIL_READ_LOG].replace_one(
{"_id": mail},
{"_id": mail, "last_read_at": datetime.datetime.utcnow()},
upsert=True,
)
except Exception as err:
self.logger.warning("update_mail_read_time error: %s", err)
MONGO_STORE_MANAGER = MongoDbManager() MONGO_STORE_MANAGER = MongoDbManager()
+66
View File
@@ -0,0 +1,66 @@
"""
mail/
=====
Package de lecture IMAP pour le bot appointment_request.
Architecture (du plus bas au plus haut niveau) :
imap_proxy_reader — bibliothèque de bas niveau, sans dépendances internes
ProxyConfig dataclass de configuration proxy (SOCKS5/SOCKS4/HTTP)
ProxyIMAPClient IMAPClient passant par un proxy
get_imap_server(login) résolution domaine → serveur IMAP
extract_body(msg) extraction HTML/texte d'un email
send_imap_id(imap) spoofing fingerprint client (RFC 2971)
VALIDATION_URL_* constantes Hermes (source de vérité)
IMAP_SERVER_MAP table domaine → serveur
mail_constants IMAPClient avec fingerprint + fabrique create_imap()
FingerprintIMAPClient IMAPClient auto-ID après login
create_imap(login) fabrique → FingerprintIMAPClient sur le bon serveur
show_folders(imap) liste des dossiers (IMAPClient ou imaplib)
mail_reader_all_contacts logique métier de haut niveau
MailReader lit les emails d'un compte (direct ou proxy)
find_links_to_validate_from_mail_list() point d'entrée principal
"""
from mail.imap_proxy_reader import (
ProxyConfig,
ProxyIMAPClient,
get_imap_server,
extract_body,
send_imap_id,
VALIDATION_URL_SUBJECT_FR,
VALIDATION_URL_SUBJECT_EN,
VALIDATION_URL_REGEX,
IMAP_SERVER_MAP,
)
from mail.mail_constants import (
FingerprintIMAPClient,
create_imap,
show_folders,
)
from mail.mail_reader_all_contacts import (
MailReader,
find_links_to_validate_from_mail_list,
)
__all__ = [
# imap_proxy_reader
"ProxyConfig",
"ProxyIMAPClient",
"get_imap_server",
"extract_body",
"send_imap_id",
"VALIDATION_URL_SUBJECT_FR",
"VALIDATION_URL_SUBJECT_EN",
"VALIDATION_URL_REGEX",
"IMAP_SERVER_MAP",
# mail_constants
"FingerprintIMAPClient",
"create_imap",
"show_folders",
# mail_reader_all_contacts
"MailReader",
"find_links_to_validate_from_mail_list",
]
+121 -2
View File
@@ -26,6 +26,8 @@ from dataclasses import dataclass, field
from email.message import Message from email.message import Message
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
import random
import socks import socks
from dotenv import load_dotenv from dotenv import load_dotenv
from imapclient import IMAPClient from imapclient import IMAPClient
@@ -40,7 +42,7 @@ VALIDATION_URL_SUBJECT_FR = "Validation de votre demande de rendez-vous"
VALIDATION_URL_SUBJECT_EN = "Please confirm your appointment request" VALIDATION_URL_SUBJECT_EN = "Please confirm your appointment request"
VALIDATION_URL_REGEX = ( VALIDATION_URL_REGEX = (
r"https:\/\/rendezvousparis\.hermes\.com" r"https:\/\/rendezvousparis\.hermes\.com"
r"\/client\/register\/[A-Z0-9]+\/validate\.code=[A-Z0-9]+" r"\/client\/register\/[A-Z0-9]+\/validate[?.]code=[A-Z0-9]+"
) )
DATE_FORMAT = "%d-%b-%Y" DATE_FORMAT = "%d-%b-%Y"
@@ -198,11 +200,122 @@ class ProxyIMAP4_TLS(imaplib.IMAP4):
def shutdown(self) -> None: def shutdown(self) -> None:
imaplib.IMAP4.shutdown(self) imaplib.IMAP4.shutdown(self)
def id(self, parameters: dict) -> tuple:
"""
Envoie la commande IMAP ID (RFC 2971).
parameters : dict ex. {"name": "Thunderbird", "version": "115.0"}
Retourne le tuple brut (typ, data) renvoyé par le serveur.
"""
args = " ".join(
'"{}"'.format(str(v).replace('"', '\\"'))
for pair in parameters.items()
for v in pair
)
return self._simple_command("ID", "({})".format(args))
# ────────────────────────────────────────────────────────────── # ──────────────────────────────────────────────────────────────
# IMAPClient avec proxy # Profils de clients IMAP réels (pour spoofing du fingerprint)
# ────────────────────────────────────────────────────────────── # ──────────────────────────────────────────────────────────────
_IMAP_CLIENT_PROFILES = [
# Mozilla Thunderbird 115 (ESR) — Windows
{
"name": "Thunderbird",
"version": "115.9.0",
"vendor": "Mozilla",
"support-url": "https://support.mozilla.org/",
"command": "IMAP4rev1",
"os": "Windows NT 10.0",
"os-version": "10.0",
},
# Mozilla Thunderbird 115 — macOS
{
"name": "Thunderbird",
"version": "115.9.0",
"vendor": "Mozilla",
"support-url": "https://support.mozilla.org/",
"command": "IMAP4rev1",
"os": "macOS",
"os-version": "14.4",
},
# Apple Mail — macOS Sonoma
{
"name": "Mac OS X Mail",
"version": "16.0",
"vendor": "Apple Inc.",
"support-url": "https://support.apple.com/mail",
"os": "Mac OS X",
"os-version": "14.4",
},
# Apple Mail — iOS
{
"name": "iPhone Mail",
"version": "17.4",
"vendor": "Apple Inc.",
"os": "iOS",
"os-version": "17.4",
},
# Outlook pour Windows (MAPI/IMAP bridge)
{
"name": "Microsoft Outlook",
"version": "16.0.17531.20108",
"vendor": "Microsoft Corporation",
"support-url": "https://support.microsoft.com/outlook",
"os": "Windows NT 10.0",
"os-version": "10.0",
},
]
def _random_imap_id_params() -> dict:
"""Retourne un profil aléatoire parmi les clients IMAP réels."""
return random.choice(_IMAP_CLIENT_PROFILES)
def send_imap_id(imap, params: Optional[dict] = None) -> None:
"""
Envoie la commande IMAP ID après connexion pour usurper le fingerprint
client. Fonctionne avec IMAPClient (imapclient) et imaplib.IMAP4.
Paramètres
----------
imap : IMAPClient | imaplib.IMAP4
params : dict, optional — si None, un profil aléatoire est choisi.
"""
if params is None:
params = _random_imap_id_params()
try:
if isinstance(imap, IMAPClient):
# imapclient expose _imap (l'objet imaplib sous-jacent)
_raw = imap._imap
if hasattr(_raw, "id"):
_raw.id(params)
else:
# Fallback : commande brute via imapclient
args = " ".join(
'"{}"'.format(str(v).replace('"', '\\"'))
for pair in params.items()
for v in pair
)
imap._imap._simple_command("ID", "({})".format(args))
elif hasattr(imap, "id"):
# ProxyIMAP4_TLS ou tout imaplib.IMAP4 patchable
imap.id(params)
else:
# Dernier recours : commande brute imaplib
args = " ".join(
'"{}"'.format(str(v).replace('"', '\\"'))
for pair in params.items()
for v in pair
)
imap._simple_command("ID", "({})".format(args))
except Exception as exc:
logger.debug("IMAP ID non supporté ou ignoré : %s", exc)
class ProxyIMAPClient(IMAPClient): class ProxyIMAPClient(IMAPClient):
""" """
Sous-classe d'IMAPClient qui utilise un proxy SOCKS/HTTP. Sous-classe d'IMAPClient qui utilise un proxy SOCKS/HTTP.
@@ -239,6 +352,12 @@ class ProxyIMAPClient(IMAPClient):
"Utilisez ssl=True (port 993)." "Utilisez ssl=True (port 993)."
) )
def login(self, username: str, password: str):
"""Surcharge login() pour envoyer IMAP ID juste après l'authentification."""
result = super().login(username, password)
send_imap_id(self)
return result
# ────────────────────────────────────────────────────────────── # ──────────────────────────────────────────────────────────────
# Fonctions utilitaires # Fonctions utilitaires
+54 -84
View File
@@ -1,8 +1,22 @@
"""
mail_constants.py
=================
Constantes de domaine email et fabrique d'instances IMAPClient.
Architecture mail/ :
imap_proxy_reader ← bibliothèque de bas niveau (proxy, IMAP ID, extract_body, server map)
mail_constants ← cette couche : FingerprintIMAPClient + create_imap() pour les comptes directs
mail_reader_all_contacts ← logique métier haut niveau (MailReader, find_links_to_validate_…)
"""
import imaplib import imaplib
from imapclient import IMAPClient from imapclient import IMAPClient
from mail.imap_proxy_reader import send_imap_id, get_imap_server
# ── Constantes de domaine (conservées pour la compatibilité des imports externes) ──
# 邮件域名常量
DOMAIN_YAHOO = "yahoo.com" DOMAIN_YAHOO = "yahoo.com"
DOMAIN_SINA = "sina.com" DOMAIN_SINA = "sina.com"
DOMAIN_HOTMAIL = "hotmail.com" DOMAIN_HOTMAIL = "hotmail.com"
@@ -27,8 +41,6 @@ DOMAIN_GAZETA_PL = "gazeta.pl"
DOMAIN_NAVER = "naver.com" DOMAIN_NAVER = "naver.com"
DOMAIN_INBOX_LV = "inbox.lv" DOMAIN_INBOX_LV = "inbox.lv"
DOMAIN_GMX_DE = "gmx.de" DOMAIN_GMX_DE = "gmx.de"
# 垃圾邮件域名
DOMAIN_PISS_MAIL = "pissmail.com" DOMAIN_PISS_MAIL = "pissmail.com"
DOMAIN_INCEL_EMAIL = "incel.email" DOMAIN_INCEL_EMAIL = "incel.email"
DOMAIN_SHITPOSTING_EXPERT = "shitposting.expert" DOMAIN_SHITPOSTING_EXPERT = "shitposting.expert"
@@ -40,93 +52,51 @@ DOMAIN_WEB_DE = "web.de"
DOMAIN_OUTLOOK_COM = "outlook.com" DOMAIN_OUTLOOK_COM = "outlook.com"
DOMAIN_FIREMAIL_DE = "firemail.de" DOMAIN_FIREMAIL_DE = "firemail.de"
# IMAP服务器地址常量 # ── Note : les constantes IMAP_SERVER_* ont été supprimées. ───────────────────
AOL_IMAP_SERVER = "imap.aol.com" # Utiliser imap_proxy_reader.IMAP_SERVER_MAP ou imap_proxy_reader.get_imap_server(login)
IMAP_SERVER_163 = "imap.163.com" # pour obtenir le serveur IMAP correspondant à un domaine.
IMAP_SERVER_SINA = "imap.sina.com"
YAHOO_IMAP_SERVER = "imap.mail.yahoo.com"
HOTMAIL_IMAP_SERVER = "outlook.office365.com"
RAMBLER_IMAP_SERVER = "imap.rambler.ru"
ALICE_IMAP_SERVER = "in.alice.it" # ── IMAPClient avec IMAP ID spoofing ─────────────────────────────────────────
TIME_IT_SERVER = "imap.tim.it"
MARS_DTI_NE_JP_SERVER = "imap.cm.dream.jp" class FingerprintIMAPClient(IMAPClient):
NAVER_SERVER = "imap.naver.com" """
BTVM_NE_JP_SERVER = "imap.btvm.ne.jp" IMAPClient qui envoie automatiquement la commande IMAP ID (RFC 2971)
GMAIL_IMAP_SERVER = "imap.gmail.com" après chaque login(), pour usurper le fingerprint d'un vrai client mail.
ONET_IMAP_SERVER = "imap.poczta.onet.pl" """
GMX_IMAP_SERVER = "imap.gmx.com"
GMX_NET_IMAP_SERVER = "imap.gmx.net" def login(self, username: str, password: str):
GMX_AT_IMAP_SERVER = "imap.gmx.at" result = super().login(username, password)
FIREMAIL_DE_IMAP_SERVER = "imap.firemail.de" send_imap_id(self)
PISS_MAIL_IMAP_SERVER = "mail.pissmail.com" return result
INBOX_LV_IMAP_SERVER = "mail.inbox.lv"
WEB_DE_IMAP_SERVER = "imap.web.de"
GAZETA_PL_IMAP_SERVER = "imap.gazeta.pl" # ── Fabrique d'instances IMAPClient ──────────────────────────────────────────
def show_folders(imap) -> list: def show_folders(imap) -> list:
"""Retourne la liste des dossiers IMAP (compatible IMAPClient et imaplib)."""
folders = [] folders = []
isImapClient = isinstance(imap, IMAPClient) is_imap_client = isinstance(imap, IMAPClient)
if not isImapClient: if not is_imap_client:
for i in imap.list()[1]: for i in imap.list()[1]:
l = i.decode().split(' "/" ') parts = i.decode().split(' "/" ')
if len(l) > 1: if len(parts) > 1:
folders.append(l[1]) folders.append(parts[1])
if len(folders) == 0: if not folders:
folders.append('INBOX') folders.append("INBOX")
return folders
else: else:
list = imap.list_folders() for info in imap.list_folders():
for i in list: folders.append(info[-1])
name = i[-1]
folders.append(name)
return folders return folders
def create_imap(login: str): def create_imap(login: str) -> FingerprintIMAPClient:
# 创建一个IMAP4类实例 """
if DOMAIN_163 in login: Crée et retourne un FingerprintIMAPClient connecté au bon serveur IMAP
imap = IMAPClient(IMAP_SERVER_163, use_uid=True) pour le domaine de l'adresse email fournie.
elif DOMAIN_YAHOO in login:
imap = IMAPClient(YAHOO_IMAP_SERVER, use_uid=True) La résolution domaine → serveur est déléguée à get_imap_server()
elif DOMAIN_FIREMAIL_DE in login: (défini dans imap_proxy_reader, source de vérité unique).
imap = IMAPClient(FIREMAIL_DE_IMAP_SERVER, use_uid=True) """
elif DOMAIN_GMX in login or DOMAIN_GMX_FR in login or DOMAIN_GMX_US in login or DOMAIN_GMX_CH in login or DOMAIN_GMX_PT in login or DOMAIN_GMX_SG in login: server = get_imap_server(login)
imap = IMAPClient(GMX_IMAP_SERVER, use_uid=True) return FingerprintIMAPClient(server, use_uid=True)
elif DOMAIN_SINA in login:
imap = IMAPClient(IMAP_SERVER_SINA, use_uid=True)
elif DOMAIN_HOTMAIL in login or DOMAIN_OUTLOOK_COM in login:
imap = IMAPClient(HOTMAIL_IMAP_SERVER, use_uid=True)
elif DOMAIN_RAMBLER_RU in login:
imap = IMAPClient(RAMBLER_IMAP_SERVER, use_uid=True)
elif DOMAIN_BTVM_NE_JP in login:
imap = IMAPClient(BTVM_NE_JP_SERVER, use_uid=True)
elif DOMAIN_GMAIL in login:
imap = IMAPClient(GMAIL_IMAP_SERVER, use_uid=True)
elif DOMAIN_ONET in login:
imap = IMAPClient(ONET_IMAP_SERVER, use_uid=True)
elif DOMAIN_TIM_IT in login:
imap = IMAPClient(TIME_IT_SERVER, use_uid=True)
elif DOMAIN_ALICE_IT in login:
imap = IMAPClient(ALICE_IMAP_SERVER, use_uid=True)
elif DOMAIN_MARS_DTI_NE_JP in login:
imap = IMAPClient(MARS_DTI_NE_JP_SERVER, use_uid=True)
elif DOMAIN_AURORA_DTI_NE_JP in login:
imap = IMAPClient(MARS_DTI_NE_JP_SERVER, use_uid=True)
elif DOMAIN_NAVER in login:
imap = IMAPClient(NAVER_SERVER, use_uid=True)
elif DOMAIN_GMX_DE in login or DOMAIN_GMX_NET in login:
imap = IMAPClient(GMX_NET_IMAP_SERVER, use_uid=True)
elif DOMAIN_GMX_AT in login:
imap = IMAPClient(GMX_AT_IMAP_SERVER, use_uid=True)
elif DOMAIN_GAZETA_PL in login:
imap = IMAPClient(GAZETA_PL_IMAP_SERVER, use_uid=True)
elif DOMAIN_INBOX_LV in login:
imap = IMAPClient(INBOX_LV_IMAP_SERVER, use_uid=True)
elif DOMAIN_WEB_DE in login:
imap = IMAPClient(WEB_DE_IMAP_SERVER, use_uid=True)
elif DOMAIN_PISS_MAIL in login or DOMAIN_CHILD_PIZZA in login or DOMAIN_DMC_CHAT in login or DOMAIN_GENOCIDE_FUN in login or DOMAIN_HATESJE_WS in login or DOMAIN_INCEL_EMAIL in login or DOMAIN_SHITPOSTING_EXPERT in login:
imap = IMAPClient(PISS_MAIL_IMAP_SERVER, use_uid=True)
else:
imap = IMAPClient(AOL_IMAP_SERVER, use_uid=True)
return imap
+203 -130
View File
@@ -2,39 +2,59 @@ import datetime
import email import email
import logging import logging
import os import os
import random
import re import re
from concurrent.futures import ThreadPoolExecutor import time
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed
from email.header import decode_header from email.header import decode_header
from email.message import Message from typing import Union, List, Optional, Dict
from typing import Union, List, Optional
from dotenv import load_dotenv from dotenv import load_dotenv
from imapclient import IMAPClient
from db.mongo_manager import MONGO_STORE_MANAGER from db.mongo_manager import MONGO_STORE_MANAGER
from excel_reader import read_contacts from excel_reader import read_contacts
from mail.mail_constants import DOMAIN_HOTMAIL, create_imap from mail.mail_constants import DOMAIN_HOTMAIL, create_imap, show_folders
from mail.imap_proxy_reader import ProxyIMAPClient, ProxyConfig, get_imap_server from mail.imap_proxy_reader import (
ProxyIMAPClient, ProxyConfig, get_imap_server,
extract_body,
VALIDATION_URL_SUBJECT_FR, VALIDATION_URL_SUBJECT_EN,
VALIDATION_URL_REGEX, DATE_FORMAT,
)
from imapclient import IMAPClient
from models.ReserveResultPojo import ReserveResultPojo from models.ReserveResultPojo import ReserveResultPojo
from models.mail_pojo import MailPojo, MailAddress from models.mail_pojo import MailPojo, MailAddress
# Charger les variables d'environnement depuis .env # Charger les variables d'environnement depuis .env
load_dotenv() load_dotenv()
# 定义常量 # ── Constantes locales ────────────────────────────────────────────────────────
VALIDATION_URL_SUBJECT_FR = 'Validation de votre demande de rendez-vous' # VALIDATION_URL_SUBJECT_FR, VALIDATION_URL_SUBJECT_EN, VALIDATION_URL_REGEX,
VALIDATION_URL_SUBJECT_EN = 'Please confirm your appointment request' # DATE_FORMAT sont importés depuis imap_proxy_reader (source de vérité unique).
VALIDATION_URL_REGEX = r"https:\/\/rendezvousparis.hermes.com\/client\/register\/[A-Z0-9]+\/validate.code=[A-Z0-9]+"
PART_VALIDATION_URL_REGEX = r"client\/register\/[A-Z0-9]+\/validate.code=[A-Z0-9]+" PART_VALIDATION_URL_REGEX = r"client\/register\/[A-Z0-9]+\/validate.code=[A-Z0-9]+"
HERMES_EMAIL = "no-reply@hermes.com" HERMES_EMAIL = "no-reply@hermes.com"
EMAIL_ADDRESS_REGEX = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b' EMAIL_ADDRESS_REGEX = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b'
# 日期格式
DATE_FORMAT = "%d-%b-%Y"
# Timeouts GMX (en secondes) # Timeouts GMX (en secondes)
IMAP_SOCKET_TIMEOUT = 300 # timeout socket pour chaque opération IMAP IMAP_SOCKET_TIMEOUT = 300 # timeout socket pour chaque opération IMAP
FUTURE_TIMEOUT = 600 # durée max allouée à la lecture d'une boîte mail FUTURE_TIMEOUT = 600 # durée max allouée à la lecture d'une boîte mail
# 按域名限制的最大并发线程数(防止单服务商触发风控)
MAX_WORKERS_PER_DOMAIN: Dict[str, int] = {
"gmx": 80,
"aol": 5,
"gmail": 3,
"yahoo": 5,
"outlook": 5,
"hotmail": 5,
"firemail": 5,
"inbox.lv": 5,
"default": 5,
}
# 两次读取同一邮箱的最短间隔(分钟),避免频繁重复登录
MAIL_READ_MIN_INTERVAL_MINUTES = 15
# GMX域名列表(用于判断是否需要使用代理) # GMX域名列表(用于判断是否需要使用代理)
GMX_DOMAINS = ( GMX_DOMAINS = (
"gmx.com", "gmx.net", "gmx.de", "gmx.at", "gmx.com", "gmx.net", "gmx.de", "gmx.at",
@@ -42,8 +62,9 @@ GMX_DOMAINS = (
) )
# 需要通过代理读取的域名列表 # 需要通过代理读取的域名列表
# PROXY_DOMAINS = GMX_DOMAINS + ("inbox.lv",) PROXY_DOMAINS = GMX_DOMAINS + ("yahoo.com",)
PROXY_DOMAINS = GMX_DOMAINS # PROXY_DOMAINS = GMX_DOMAINS + ("yahoo.com",)
# PROXY_DOMAINS = GMX_DOMAINS
def is_gmx_account(login: str) -> bool: def is_gmx_account(login: str) -> bool:
@@ -55,6 +76,18 @@ def is_proxy_account(login: str) -> bool:
"""判断邮箱是否需要通过代理读取(GMX 或 inbox.lv""" """判断邮箱是否需要通过代理读取(GMX 或 inbox.lv"""
return any(d in login.lower() for d in PROXY_DOMAINS) return any(d in login.lower() for d in PROXY_DOMAINS)
def get_domain_group(login: str) -> str:
"""
将邮箱地址映射到域名分组键,用于限流。
例如: "user@gmx.net""gmx", "user@aol.com""aol"
"""
login_lower = login.lower()
for key in MAX_WORKERS_PER_DOMAIN:
if key != "default" and key in login_lower:
return key
return "default"
# 邮箱列表(简化为常量) # 邮箱列表(简化为常量)
REDIRECTION_MAILS = "appointment2022@aol.com, chenpeijun@aol.com,hongjiang176@aol.com,ciyuexie@aol.com,rutger.62@aol.com,ciccidaniel@aol.com,armasgoodman@aol.com,wknd.gemerine@aol.com,rafmail1981@aol.com,tonovichivanenaki@aol.com,hetland.ari@aol.com,mateusiversen@aol.com,lacerdaraffaello@aol.com,anasida76@aol.com,liamolinari@aol.com,sen70zib@aol.com,mezeiderrick@aol.com,stanisl49avchic@aol.com,damcvrobaneuron@aol.com,suyzanna_fleona@aol.com,dxealing.dissa@aol.com,hogg.karen@aol.com,obocharovamarina@aol.com,buchholzjohann@aol.com,orn.cecchini@aol.com,percivaltorgersen@aol.com,candalgudrun@aol.com,filimonis.76@aol.com,bengann_100@aol.com,axelhanne@aol.com,tiffanylarochelle@aol.com,nicoleta.r@aol.com,eichenbaum.1963@aol.com,kotensasharev@aol.com,samognat32@aol.com,edem_headshot@aol.com,kozmakuzmich1960@aol.com,damonsvensson@aol.com,anders.riva@aol.com,caiminwei123@gmail.com,yulingguo086@gmail.com,yingxiaolu086@gmail.com,lijiazhen0035@gmail.com,fangp370@gmail.com,huangyayu10086@gmail.com,fuziyuan110@gmail.com,xinyingdu886@gmail.com,yasiaforever.1971@aol.com,lukaszfidalgo@aol.com,zaichi29@aol.com,prostotakitak.1974@aol.com,mo90nroe@aol.com,blonde.87@aol.com,dimidrol.1969@aol.com" REDIRECTION_MAILS = "appointment2022@aol.com, chenpeijun@aol.com,hongjiang176@aol.com,ciyuexie@aol.com,rutger.62@aol.com,ciccidaniel@aol.com,armasgoodman@aol.com,wknd.gemerine@aol.com,rafmail1981@aol.com,tonovichivanenaki@aol.com,hetland.ari@aol.com,mateusiversen@aol.com,lacerdaraffaello@aol.com,anasida76@aol.com,liamolinari@aol.com,sen70zib@aol.com,mezeiderrick@aol.com,stanisl49avchic@aol.com,damcvrobaneuron@aol.com,suyzanna_fleona@aol.com,dxealing.dissa@aol.com,hogg.karen@aol.com,obocharovamarina@aol.com,buchholzjohann@aol.com,orn.cecchini@aol.com,percivaltorgersen@aol.com,candalgudrun@aol.com,filimonis.76@aol.com,bengann_100@aol.com,axelhanne@aol.com,tiffanylarochelle@aol.com,nicoleta.r@aol.com,eichenbaum.1963@aol.com,kotensasharev@aol.com,samognat32@aol.com,edem_headshot@aol.com,kozmakuzmich1960@aol.com,damonsvensson@aol.com,anders.riva@aol.com,caiminwei123@gmail.com,yulingguo086@gmail.com,yingxiaolu086@gmail.com,lijiazhen0035@gmail.com,fangp370@gmail.com,huangyayu10086@gmail.com,fuziyuan110@gmail.com,xinyingdu886@gmail.com,yasiaforever.1971@aol.com,lukaszfidalgo@aol.com,zaichi29@aol.com,prostotakitak.1974@aol.com,mo90nroe@aol.com,blonde.87@aol.com,dimidrol.1969@aol.com"
@@ -95,39 +128,29 @@ class MailReader:
"""邮件读取器类""" """邮件读取器类"""
def __init__(self, login: str, password: str, proxy: Optional[ProxyConfig] = None, def __init__(self, login: str, password: str, proxy: Optional[ProxyConfig] = None,
failed_gmx_list: Optional[List[str]] = None): failed_gmx_list: Optional[List[str]] = None,
delay_range: tuple = (1.0, 5.0)):
self.login = login self.login = login
self.password = password self.password = password
self.proxy = proxy self.proxy = proxy
self.failed_gmx_list = failed_gmx_list if failed_gmx_list is not None else [] self.failed_gmx_list = failed_gmx_list if failed_gmx_list is not None else []
self.delay_range = delay_range # (min_seconds, max_seconds) 随机延迟范围
@staticmethod
def show_folders(imap) -> List[str]:
"""获取邮箱文件夹列表"""
folders = []
is_imap_client = isinstance(imap, IMAPClient)
if not is_imap_client:
# 处理非IMAPClient对象
for i in imap.list()[1]:
l = i.decode().split(' "/" ')
folders.append(l[1])
else:
# 处理IMAPClient对象
folder_list = imap.list_folders()
for i in folder_list:
name = i[-1]
folders.append(name)
return folders
def read_emails(self, mails_messages: List[MailPojo]) -> List[MailPojo]: def read_emails(self, mails_messages: List[MailPojo]) -> List[MailPojo]:
"""读取邮件""" """读取邮件(含随机延迟和读取时间记录)"""
# 随机延迟,模拟人工节奏,降低被识别为机器人的概率
_delay = random.uniform(*self.delay_range)
time.sleep(_delay)
# ── GMX / inbox.lv 账户 → 使用代理连接(失败自动重试最多3次)── # ── GMX / inbox.lv 账户 → 使用代理连接(失败自动重试最多3次)──
if is_proxy_account(self.login) and self.proxy is not None: if is_proxy_account(self.login) and self.proxy is not None:
return self._read_emails_with_proxy_retry(mails_messages) result = self._read_emails_with_proxy_retry(mails_messages)
else: else:
return self._read_emails_internal(create_imap(self.login), mails_messages) result = self._read_emails_internal(create_imap(self.login), mails_messages)
# 记录本次读取时间,供下次调用的 need_to_check_email 判断间隔
MONGO_STORE_MANAGER.update_mail_read_time(self.login)
return result
def _read_emails_with_proxy_retry( def _read_emails_with_proxy_retry(
self, self,
@@ -175,8 +198,8 @@ class MailReader:
mail_list = [] mail_list = []
print("read mails from {}".format(self.login)) print("read mails from {}".format(self.login))
# 获取文件夹列表 # 获取文件夹列表(委托给 mail_constants.show_folders
folder_list = self.show_folders(imap) folder_list = show_folders(imap)
# 处理每个文件夹 # 处理每个文件夹
for folder in folder_list: for folder in folder_list:
@@ -204,28 +227,36 @@ class MailReader:
return mail_list return mail_list
def _get_messages_from_folder(self, imap, subject: str, folder: str = "INBOX") -> List[MailPojo]: def _get_messages_from_folder(self, imap, subject: str, folder: str = "INBOX") -> List[MailPojo]:
"""从指定文件夹获取邮件(传统IMAP方式)""" """从指定文件夹获取邮件(传统IMAP方式,批量fetch减少往返次数"""
imap.select(folder) imap.select(folder)
mail_messages = [] mail_messages = []
# 搜索邮件 # 搜索符合条件的所有邮件ID
search_query = '(SUBJECT "{}" SINCE "{}")'.format(subject, datetime.datetime.today().strftime(DATE_FORMAT)) search_query = '(SUBJECT "{}" SINCE "{}")'.format(subject, datetime.datetime.today().strftime(DATE_FORMAT))
typ, data = imap.search(None, search_query) typ, data = imap.search(None, search_query)
for i in data[0].split(): ids = data[0].split()
try: if not ids:
# 获取邮件内容 return mail_messages
res, msg = imap.fetch(i.decode("utf-8"), "(RFC822)")
# 解析邮件 # 批量fetch:一次请求取回所有匹配邮件,减少 N 次往返为 1 次
for response in msg: id_list = b",".join(ids)
if isinstance(response, tuple): try:
res, msg_list = imap.fetch(id_list, "(RFC822)")
except Exception as error:
print("Batch fetch error in folder {}: {}".format(folder, error))
return mail_messages
for response in msg_list:
if not isinstance(response, tuple):
continue
try:
email_message = email.message_from_bytes(response[1]) email_message = email.message_from_bytes(response[1])
# 解码主题 # 解码主题
subject, subject_encoded = decode_header(email_message["Subject"])[0] subject_decoded, subject_encoded = decode_header(email_message["Subject"])[0]
if isinstance(subject, bytes): if isinstance(subject_decoded, bytes):
subject = subject.decode(subject_encoded) subject_decoded = subject_decoded.decode(subject_encoded)
# 解码发件人地址 # 解码发件人地址
from_address = find_from_mail(decode_header(email_message.get("From"))) from_address = find_from_mail(decode_header(email_message.get("From")))
@@ -236,15 +267,15 @@ class MailReader:
print("Email:", self.login) print("Email:", self.login)
print("From:", from_address) print("From:", from_address)
print("To:", to_email) print("To:", to_email)
print("Subject:", subject) print("Subject:", subject_decoded)
# 获取邮件正文 # 获取邮件正文(委托给 imap_proxy_reader.extract_body
body = self._extract_body(email_message) body = extract_body(email_message)
# 检查是否是预约验证邮件 # 检查是否是预约验证邮件
if VALIDATION_URL_SUBJECT_FR in subject or VALIDATION_URL_SUBJECT_EN in subject: if VALIDATION_URL_SUBJECT_FR in subject_decoded or VALIDATION_URL_SUBJECT_EN in subject_decoded:
mail = MailPojo( mail = MailPojo(
subject=subject, subject=subject_decoded,
body=body, body=body,
from_address=from_address from_address=from_address
) )
@@ -262,30 +293,6 @@ class MailReader:
return mail_messages return mail_messages
def _extract_body(self, email_message: Message) -> str:
"""提取邮件正文"""
body = ""
# 遍历邮件部分
for part in email_message.walk():
try:
content_type = part.get_content_type()
if content_type == "text/html":
# 处理HTML内容
payload = part.get_payload(decode=True)
if payload:
body += payload.decode("utf-8", errors="ignore")
elif content_type == "text/plain":
# 处理纯文本内容
payload = part.get_payload()
if payload:
body += payload
except Exception as error:
print("Error extracting body part: {}".format(error))
return body
def _get_messages_from_folder_for_imapclient(self, imap, folder: str = "INBOX") -> List[MailPojo]: def _get_messages_from_folder_for_imapclient(self, imap, folder: str = "INBOX") -> List[MailPojo]:
"""从指定文件夹获取邮件(IMAPClient方式)""" """从指定文件夹获取邮件(IMAPClient方式)"""
mail_messages = [] mail_messages = []
@@ -308,8 +315,8 @@ class MailReader:
email_message = email.message_from_bytes(message_data[b'RFC822']) email_message = email.message_from_bytes(message_data[b'RFC822'])
# 获取发件人和主题 # 获取发件人和主题
from_address = email_message.get('FROM') from_address = email_message.get('FROM') or ""
subject = email_message.get('subject') subject = email_message.get('subject') or ""
# 检查是否是Hermes邮件 # 检查是否是Hermes邮件
hermes_mail_address = "no-reply@hermes.com" hermes_mail_address = "no-reply@hermes.com"
@@ -317,8 +324,8 @@ class MailReader:
"outlook.com" in from_address or "outlook.com" in from_address or
"hotmail" in from_address): "hotmail" in from_address):
# 提取邮件正文 # 提取邮件正文(委托给 imap_proxy_reader.extract_body
body = self._extract_body_for_imapclient(email_message) body = extract_body(email_message)
# 检查是否是预约验证邮件 # 检查是否是预约验证邮件
if (VALIDATION_URL_SUBJECT_FR in subject or if (VALIDATION_URL_SUBJECT_FR in subject or
@@ -351,24 +358,6 @@ class MailReader:
return mail_messages return mail_messages
def _extract_body_for_imapclient(self, email_message: Message) -> str:
"""提取IMAPClient邮件正文"""
body = ""
for part in email_message.walk():
content_type = part.get_content_type()
if content_type == "text/html":
payload = part.get_payload(decode=True)
if payload:
body += payload.decode("utf-8", errors="ignore")
elif content_type == "text/plain":
payload = part.get_payload()
if payload:
body += payload
return body
# 邮件处理相关函数 # 邮件处理相关函数
def find_item_by_url(url: str, successful_items) -> Union[None, ReserveResultPojo]: def find_item_by_url(url: str, successful_items) -> Union[None, ReserveResultPojo]:
@@ -405,57 +394,120 @@ def need_to_valid_url(url: str, item: Union[ReserveResultPojo, None]) -> bool:
def need_to_check_email(mail: str, successful_items) -> bool: def need_to_check_email(mail: str, successful_items) -> bool:
"""判断是否需要检查邮件""" """
判断是否需要检查邮件。
两种情况跳过:
1. 该邮箱已有成功验证记录(原逻辑)
2. 距上次读取不足 MAIL_READ_MIN_INTERVAL_MINUTES 分钟(防频繁重复登录)
"""
print("successful_items size is " + str(len(successful_items))) print("successful_items size is " + str(len(successful_items)))
# 过滤已验证的项目 # 原逻辑:已有成功验证则跳过
filtered_items = [item for item in successful_items if item.email == mail] filtered_items = [item for item in successful_items if item.email == mail]
# 检查是否有已验证的项目
validated_items = [item for item in filtered_items validated_items = [item for item in filtered_items
if item.url_validated is not None and item.url_validated is True] if item.url_validated is not None and item.url_validated is True]
if len(validated_items) > 0:
return False
return len(validated_items) == 0 # 新逻辑:距上次读取时间太短则跳过
last_read = MONGO_STORE_MANAGER.get_last_mail_read_time(mail)
if last_read is not None:
elapsed_minutes = (datetime.datetime.utcnow() - last_read).total_seconds() / 60
if elapsed_minutes < MAIL_READ_MIN_INTERVAL_MINUTES:
print("[跳过] {} 距上次读取仅 {:.1f} 分钟,未达到最小间隔 {} 分钟".format(
mail, elapsed_minutes, MAIL_READ_MIN_INTERVAL_MINUTES))
return False
return True
def find_links_to_validate_from_mail_list( def find_links_to_validate_from_mail_list(
mail_list: List[MailAddress], mail_list: List[MailAddress],
logger, logger,
proxy: Optional[ProxyConfig] = None, proxy: Optional[ProxyConfig] = None,
proxy_pool: Optional[List[ProxyConfig]] = None,
) -> List[str]: ) -> List[str]:
"""从邮件列表中查找需要验证的链接,返回读取失败的GMX账户列表""" """
从邮件列表中查找需要验证的链接,返回读取失败的GMX账户列表。
参数
----
proxy : 单一代理(GMX专用,兼容旧调用方式)
proxy_pool : 代理列表(非GMX账号也会轮换使用;若为空则非GMX走直连)
"""
if not mail_list: if not mail_list:
return [] return []
# 检查时间前开始检查邮件
contact_to_book_list = MONGO_STORE_MANAGER.get_all_contact_to_book_list() contact_to_book_list = MONGO_STORE_MANAGER.get_all_contact_to_book_list()
successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day() successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
mails_messages = [] mails_messages = []
failed_gmx: List[str] = [] failed_gmx: List[str] = []
# 使用线程池处理邮件 # ── 按域名分组,每组使用独立线程池限流 ────────────────────────────
with ThreadPoolExecutor(max_workers=100) as executor: # domain_group → [(MailAddress, ProxyConfig|None), ...]
futures = [] grouped: Dict[str, List[tuple]] = defaultdict(list)
for mail in mail_list: for idx, mail in enumerate(mail_list):
# 检查是否需要读取邮件 if not need_to_check_email(mail.mail, successful_items):
if need_to_check_email(mail.mail, successful_items): continue
mail_reader = MailReader(mail.mail, mail.password, proxy=proxy,
failed_gmx_list=failed_gmx) # 为账号分配代理
if is_proxy_account(mail.mail):
# GMX / inbox.lv → 使用专用 GMX 代理
assigned_proxy = proxy
elif proxy_pool:
# 非GMX + 有代理池 → 按索引轮换分配
assigned_proxy = proxy_pool[idx % len(proxy_pool)]
else:
# 无代理池 → 直连
assigned_proxy = None
group_key = get_domain_group(mail.mail)
grouped[group_key].append((mail, assigned_proxy))
# ── 每个域名分组启动独立线程池 ────────────────────────────────────
# future → mail address,用于进度显示
future_to_mail: Dict[object, str] = {}
executors = []
for group_key, items in grouped.items():
max_w = MAX_WORKERS_PER_DOMAIN.get(group_key, MAX_WORKERS_PER_DOMAIN["default"])
executor = ThreadPoolExecutor(max_workers=max_w)
executors.append(executor)
print("[限流] 域名组 '{}': {} 账号,max_workers={}".format(
group_key, len(items), max_w))
for mail, assigned_proxy in items:
mail_reader = MailReader(
mail.mail,
mail.password,
proxy=assigned_proxy,
failed_gmx_list=failed_gmx,
)
future = executor.submit(mail_reader.read_emails, mails_messages) future = executor.submit(mail_reader.read_emails, mails_messages)
futures.append(future) future_to_mail[future] = mail.mail
# 等待所有任务完成 # ── 等待所有任务完成,然后关闭线程池 ─────────────────────────────
for future in futures: total = len(future_to_mail)
completed = 0
for future in as_completed(future_to_mail):
mail_addr = future_to_mail[future]
completed += 1
try: try:
future.result(timeout=FUTURE_TIMEOUT) future.result(timeout=FUTURE_TIMEOUT)
print("[进度] {}/{} {}".format(completed, total, mail_addr))
except TimeoutError: except TimeoutError:
print("⏱️ Timeout ({} s) dépassé pour une boîte mail — lecture ignorée.".format(FUTURE_TIMEOUT)) print("[进度] {}/{} {} — Timeout ({} s), lecture ignorée.".format(
completed, total, mail_addr, FUTURE_TIMEOUT))
except Exception as e: except Exception as e:
print("Error processing mail: {},login: {}, password: {}".format(e,mail.mail, mail.password)) print("[进度] {}/{} {} — Erreur: {}".format(completed, total, mail_addr, e))
# ── Résumé des comptes proxy en échec ────────────────────── for executor in executors:
executor.shutdown(wait=False)
# ── 输出代理账号读取摘要 ──────────────────────────────────────────
if failed_gmx: if failed_gmx:
print("\n[Proxy] ⚠️ {} compte(s) non lus (GMX / inbox.lv) :".format(len(failed_gmx))) print("\n[Proxy] ⚠️ {} compte(s) non lus (GMX / inbox.lv) :".format(len(failed_gmx)))
for addr in failed_gmx: for addr in failed_gmx:
@@ -463,10 +515,9 @@ def find_links_to_validate_from_mail_list(
else: else:
print("\n[Proxy] ✅ Tous les comptes GMX / inbox.lv ont été lus avec succès.") print("\n[Proxy] ✅ Tous les comptes GMX / inbox.lv ont été lus avec succès.")
# 刷新成功的项目 # ── 处理邮件中的验证链接 ──────────────────────────────────────────
_refreshed_successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day() _refreshed_successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
# 处理邮件中的链接
for mail in mails_messages: for mail in mails_messages:
match = re.search(VALIDATION_URL_REGEX, mail.body) match = re.search(VALIDATION_URL_REGEX, mail.body)
if match: if match:
@@ -500,7 +551,8 @@ if __name__ == '__main__':
# file_name="~/Desktop/contact_list_2026-04-21_200_yahoo.xlsx") # file_name="~/Desktop/contact_list_2026-04-21_200_yahoo.xlsx")
# file_name="~/Desktop/contact_list_yahoo_100_20_04.xlsx") # file_name="~/Desktop/contact_list_yahoo_100_20_04.xlsx")
# file_name="~/Desktop/contact_yahoo_5.xlsx") # file_name="~/Desktop/contact_yahoo_5.xlsx")
file_name="~/Desktop/contact_list_2026-04-22.xlsx") # file_name="~/Desktop/contact_list_2026-04-24_yahoo_50.xlsx")
file_name="~/Desktop/contact_list_2026-04-23.xlsx")
# file_name="~/Desktop/contact_list_2026-04-11.xlsx") # file_name="~/Desktop/contact_list_2026-04-11.xlsx")
# file_name="~/Desktop/contact_list_2026-04-17.xlsx") # file_name="~/Desktop/contact_list_2026-04-17.xlsx")
# file_name="~/Desktop/contact_list_inbox_100_14_04.xlsx") # file_name="~/Desktop/contact_list_inbox_100_14_04.xlsx")
@@ -532,7 +584,7 @@ if __name__ == '__main__':
_to_add = False _to_add = False
if _to_add: if _to_add:
filter_mail.append(mail_pojo) filter_mail.append(mail_pojo)
# filter_mail = [MailAddress("minnakan@firemail.de", "Yjn8nQ0sZ")] # filter_mail = [MailAddress("pishikmamn@gmx.de", "53OBns2jAXE")]
# ── Mode de lecture : GMX_ONLY=true → uniquement les comptes GMX ── # ── Mode de lecture : GMX_ONLY=true → uniquement les comptes GMX ──
gmx_only = os.environ.get("GMX_ONLY", "false").strip().lower() == "true" gmx_only = os.environ.get("GMX_ONLY", "false").strip().lower() == "true"
@@ -550,8 +602,29 @@ if __name__ == '__main__':
username=os.environ.get("GMX_PROXY_USERNAME"), username=os.environ.get("GMX_PROXY_USERNAME"),
password=os.environ.get("GMX_PROXY_PASSWORD"), password=os.environ.get("GMX_PROXY_PASSWORD"),
) )
# 非GMX账号代理池(可配置多个,轮换使用;留空则直连)
# 格式:PROXY_POOL_HOSTS="host1:port1,host2:port2",与 GMX_PROXY 同类型
_proxy_pool_raw = os.environ.get("PROXY_POOL_HOSTS", "").strip()
non_gmx_proxy_pool: Optional[List[ProxyConfig]] = None
if _proxy_pool_raw:
non_gmx_proxy_pool = []
for entry in _proxy_pool_raw.split(","):
entry = entry.strip()
if ":" in entry:
_h, _p = entry.rsplit(":", 1)
non_gmx_proxy_pool.append(ProxyConfig(
host=_h,
port=int(_p),
proxy_type=os.environ.get("GMX_PROXY_TYPE", "SOCKS5"),
username=os.environ.get("GMX_PROXY_USERNAME"),
password=os.environ.get("GMX_PROXY_PASSWORD"),
))
# 处理邮件 # 处理邮件
failed = find_links_to_validate_from_mail_list(filter_mail, logger, proxy=gmx_proxy) failed = find_links_to_validate_from_mail_list(
filter_mail, logger, proxy=gmx_proxy, proxy_pool=non_gmx_proxy_pool
)
# ── Afficher les comptes GMX non lus ───────────────────── # ── Afficher les comptes GMX non lus ─────────────────────
if failed: if failed:
+2 -1
View File
@@ -104,7 +104,8 @@ if __name__ == '__main__':
# file_list = ['~/Desktop/contact_list_inbox_lv_100.xlsx'] # file_list = ['~/Desktop/contact_list_inbox_lv_100.xlsx']
# file_list = ['~/Desktop/contact_list_yahoo_100_20_04.xlsx'] # file_list = ['~/Desktop/contact_list_yahoo_100_20_04.xlsx']
# file_list = ['~/Desktop/contact_list_2026-04-21_200_yahoo.xlsx'] # file_list = ['~/Desktop/contact_list_2026-04-21_200_yahoo.xlsx']
file_list = ['~/Desktop/contact_list_2026-04-21.xlsx'] file_list = ['~/Desktop/contact_list_2026-04-23.xlsx']
# file_list = ['~/Desktop/contact_list_2026-04-24_yahoo_50.xlsx']
# file_list = ['~/Desktop/reste_inbox_lv.xlsx'] # file_list = ['~/Desktop/reste_inbox_lv.xlsx']
# file_list = ['~/Desktop/contact_list_2024-09-02_firemail_de_100.xlsx'] # file_list = ['~/Desktop/contact_list_2024-09-02_firemail_de_100.xlsx']
# file_list = ['~/Desktop/contact_list_inbox_100_14_04.xlsx'] # file_list = ['~/Desktop/contact_list_inbox_100_14_04.xlsx']