improvement while reading mails

This commit is contained in:
2026-04-24 18:20:41 +02:00
parent 64e47e05e7
commit 3a3a36082b
6 changed files with 532 additions and 277 deletions
+26
View File
@@ -2,6 +2,7 @@ import datetime
import logging
import time
import os
from typing import Optional
from pymongo import MongoClient
@@ -22,6 +23,7 @@ DESTINATION_EMAIL_LIST = "DESTINATION_EMAIL_LIST"
LINKS_TO_VALIDATE = "LINKS_TO_VALIDATE"
INVALID_EMAIL_LIST = "INVALID_EMAIL_LIST"
CONTACT_LIST_SERIAL_MAP = "CONTACT_LIST_SERIAL_MAP"
MAIL_READ_LOG = "MAIL_READ_LOG" # 记录每个邮箱上次读取时间
class MongoDbManager:
@@ -264,5 +266,29 @@ class MongoDbManager:
collection_to_use = self.db[LINKS_TO_VALIDATE]
collection_to_use.delete_one({"_id": linkPojo.email})
# ── Mail read-time tracking ────────────────────────────────────
def get_last_mail_read_time(self, mail: str) -> Optional[datetime.datetime]:
"""返回指定邮箱上次被读取的 UTC 时间,若从未读取则返回 None。"""
try:
doc = self.db[MAIL_READ_LOG].find_one({"_id": mail})
if doc and "last_read_at" in doc:
return doc["last_read_at"]
except Exception as err:
self.logger.warning("get_last_mail_read_time error: %s", err)
return None
def update_mail_read_time(self, mail: str) -> None:
"""将指定邮箱的上次读取时间更新为当前 UTC 时间。"""
try:
self.db[MAIL_READ_LOG].replace_one(
{"_id": mail},
{"_id": mail, "last_read_at": datetime.datetime.utcnow()},
upsert=True,
)
except Exception as err:
self.logger.warning("update_mail_read_time error: %s", err)
MONGO_STORE_MANAGER = MongoDbManager()
+66
View File
@@ -0,0 +1,66 @@
"""
mail/
=====
Package de lecture IMAP pour le bot appointment_request.
Architecture (du plus bas au plus haut niveau) :
imap_proxy_reader — bibliothèque de bas niveau, sans dépendances internes
ProxyConfig dataclass de configuration proxy (SOCKS5/SOCKS4/HTTP)
ProxyIMAPClient IMAPClient passant par un proxy
get_imap_server(login) résolution domaine → serveur IMAP
extract_body(msg) extraction HTML/texte d'un email
send_imap_id(imap) spoofing fingerprint client (RFC 2971)
VALIDATION_URL_* constantes Hermes (source de vérité)
IMAP_SERVER_MAP table domaine → serveur
mail_constants IMAPClient avec fingerprint + fabrique create_imap()
FingerprintIMAPClient IMAPClient auto-ID après login
create_imap(login) fabrique → FingerprintIMAPClient sur le bon serveur
show_folders(imap) liste des dossiers (IMAPClient ou imaplib)
mail_reader_all_contacts logique métier de haut niveau
MailReader lit les emails d'un compte (direct ou proxy)
find_links_to_validate_from_mail_list() point d'entrée principal
"""
from mail.imap_proxy_reader import (
ProxyConfig,
ProxyIMAPClient,
get_imap_server,
extract_body,
send_imap_id,
VALIDATION_URL_SUBJECT_FR,
VALIDATION_URL_SUBJECT_EN,
VALIDATION_URL_REGEX,
IMAP_SERVER_MAP,
)
from mail.mail_constants import (
FingerprintIMAPClient,
create_imap,
show_folders,
)
from mail.mail_reader_all_contacts import (
MailReader,
find_links_to_validate_from_mail_list,
)
__all__ = [
# imap_proxy_reader
"ProxyConfig",
"ProxyIMAPClient",
"get_imap_server",
"extract_body",
"send_imap_id",
"VALIDATION_URL_SUBJECT_FR",
"VALIDATION_URL_SUBJECT_EN",
"VALIDATION_URL_REGEX",
"IMAP_SERVER_MAP",
# mail_constants
"FingerprintIMAPClient",
"create_imap",
"show_folders",
# mail_reader_all_contacts
"MailReader",
"find_links_to_validate_from_mail_list",
]
+121 -2
View File
@@ -26,6 +26,8 @@ from dataclasses import dataclass, field
from email.message import Message
from typing import List, Optional, Tuple
import random
import socks
from dotenv import load_dotenv
from imapclient import IMAPClient
@@ -40,7 +42,7 @@ VALIDATION_URL_SUBJECT_FR = "Validation de votre demande de rendez-vous"
VALIDATION_URL_SUBJECT_EN = "Please confirm your appointment request"
VALIDATION_URL_REGEX = (
r"https:\/\/rendezvousparis\.hermes\.com"
r"\/client\/register\/[A-Z0-9]+\/validate\.code=[A-Z0-9]+"
r"\/client\/register\/[A-Z0-9]+\/validate[?.]code=[A-Z0-9]+"
)
DATE_FORMAT = "%d-%b-%Y"
@@ -198,11 +200,122 @@ class ProxyIMAP4_TLS(imaplib.IMAP4):
def shutdown(self) -> None:
imaplib.IMAP4.shutdown(self)
def id(self, parameters: dict) -> tuple:
"""
Envoie la commande IMAP ID (RFC 2971).
parameters : dict ex. {"name": "Thunderbird", "version": "115.0"}
Retourne le tuple brut (typ, data) renvoyé par le serveur.
"""
args = " ".join(
'"{}"'.format(str(v).replace('"', '\\"'))
for pair in parameters.items()
for v in pair
)
return self._simple_command("ID", "({})".format(args))
# ──────────────────────────────────────────────────────────────
# IMAPClient avec proxy
# Profils de clients IMAP réels (pour spoofing du fingerprint)
# ──────────────────────────────────────────────────────────────
_IMAP_CLIENT_PROFILES = [
# Mozilla Thunderbird 115 (ESR) — Windows
{
"name": "Thunderbird",
"version": "115.9.0",
"vendor": "Mozilla",
"support-url": "https://support.mozilla.org/",
"command": "IMAP4rev1",
"os": "Windows NT 10.0",
"os-version": "10.0",
},
# Mozilla Thunderbird 115 — macOS
{
"name": "Thunderbird",
"version": "115.9.0",
"vendor": "Mozilla",
"support-url": "https://support.mozilla.org/",
"command": "IMAP4rev1",
"os": "macOS",
"os-version": "14.4",
},
# Apple Mail — macOS Sonoma
{
"name": "Mac OS X Mail",
"version": "16.0",
"vendor": "Apple Inc.",
"support-url": "https://support.apple.com/mail",
"os": "Mac OS X",
"os-version": "14.4",
},
# Apple Mail — iOS
{
"name": "iPhone Mail",
"version": "17.4",
"vendor": "Apple Inc.",
"os": "iOS",
"os-version": "17.4",
},
# Outlook pour Windows (MAPI/IMAP bridge)
{
"name": "Microsoft Outlook",
"version": "16.0.17531.20108",
"vendor": "Microsoft Corporation",
"support-url": "https://support.microsoft.com/outlook",
"os": "Windows NT 10.0",
"os-version": "10.0",
},
]
def _random_imap_id_params() -> dict:
"""Retourne un profil aléatoire parmi les clients IMAP réels."""
return random.choice(_IMAP_CLIENT_PROFILES)
def send_imap_id(imap, params: Optional[dict] = None) -> None:
"""
Envoie la commande IMAP ID après connexion pour usurper le fingerprint
client. Fonctionne avec IMAPClient (imapclient) et imaplib.IMAP4.
Paramètres
----------
imap : IMAPClient | imaplib.IMAP4
params : dict, optional — si None, un profil aléatoire est choisi.
"""
if params is None:
params = _random_imap_id_params()
try:
if isinstance(imap, IMAPClient):
# imapclient expose _imap (l'objet imaplib sous-jacent)
_raw = imap._imap
if hasattr(_raw, "id"):
_raw.id(params)
else:
# Fallback : commande brute via imapclient
args = " ".join(
'"{}"'.format(str(v).replace('"', '\\"'))
for pair in params.items()
for v in pair
)
imap._imap._simple_command("ID", "({})".format(args))
elif hasattr(imap, "id"):
# ProxyIMAP4_TLS ou tout imaplib.IMAP4 patchable
imap.id(params)
else:
# Dernier recours : commande brute imaplib
args = " ".join(
'"{}"'.format(str(v).replace('"', '\\"'))
for pair in params.items()
for v in pair
)
imap._simple_command("ID", "({})".format(args))
except Exception as exc:
logger.debug("IMAP ID non supporté ou ignoré : %s", exc)
class ProxyIMAPClient(IMAPClient):
"""
Sous-classe d'IMAPClient qui utilise un proxy SOCKS/HTTP.
@@ -239,6 +352,12 @@ class ProxyIMAPClient(IMAPClient):
"Utilisez ssl=True (port 993)."
)
def login(self, username: str, password: str):
"""Surcharge login() pour envoyer IMAP ID juste après l'authentification."""
result = super().login(username, password)
send_imap_id(self)
return result
# ──────────────────────────────────────────────────────────────
# Fonctions utilitaires
+87 -117
View File
@@ -1,132 +1,102 @@
"""
mail_constants.py
=================
Constantes de domaine email et fabrique d'instances IMAPClient.
Architecture mail/ :
imap_proxy_reader ← bibliothèque de bas niveau (proxy, IMAP ID, extract_body, server map)
mail_constants ← cette couche : FingerprintIMAPClient + create_imap() pour les comptes directs
mail_reader_all_contacts ← logique métier haut niveau (MailReader, find_links_to_validate_…)
"""
import imaplib
from imapclient import IMAPClient
from mail.imap_proxy_reader import send_imap_id, get_imap_server
# 邮件域名常量
DOMAIN_YAHOO = "yahoo.com"
DOMAIN_SINA = "sina.com"
DOMAIN_HOTMAIL = "hotmail.com"
DOMAIN_TIM_IT = "tim.it"
DOMAIN_163 = "163.com"
DOMAIN_RAMBLER_RU = "rambler.ru"
DOMAIN_ALICE_IT = "alice.it"
DOMAIN_MARS_DTI_NE_JP = "mars.dti.ne.jp"
DOMAIN_BTVM_NE_JP = "btvm.ne.jp"
DOMAIN_AURORA_DTI_NE_JP = "aurora.dti.ne.jp"
DOMAIN_GMAIL = "gmail.com"
DOMAIN_GMX = "gmx.com"
DOMAIN_GMX_NET = "gmx.net"
DOMAIN_GMX_AT = "gmx.at"
DOMAIN_GMX_FR = "gmx.fr"
DOMAIN_GMX_US = "gmx.us"
DOMAIN_GMX_SG = "gmx.sg"
DOMAIN_GMX_CH = "gmx.ch"
DOMAIN_GMX_PT = "gmx.pt"
DOMAIN_ONET = "onet.pl"
DOMAIN_GAZETA_PL = "gazeta.pl"
DOMAIN_NAVER = "naver.com"
DOMAIN_INBOX_LV = "inbox.lv"
DOMAIN_GMX_DE = "gmx.de"
# 垃圾邮件域名
DOMAIN_PISS_MAIL = "pissmail.com"
DOMAIN_INCEL_EMAIL = "incel.email"
# ── Constantes de domaine (conservées pour la compatibilité des imports externes) ──
DOMAIN_YAHOO = "yahoo.com"
DOMAIN_SINA = "sina.com"
DOMAIN_HOTMAIL = "hotmail.com"
DOMAIN_TIM_IT = "tim.it"
DOMAIN_163 = "163.com"
DOMAIN_RAMBLER_RU = "rambler.ru"
DOMAIN_ALICE_IT = "alice.it"
DOMAIN_MARS_DTI_NE_JP = "mars.dti.ne.jp"
DOMAIN_BTVM_NE_JP = "btvm.ne.jp"
DOMAIN_AURORA_DTI_NE_JP = "aurora.dti.ne.jp"
DOMAIN_GMAIL = "gmail.com"
DOMAIN_GMX = "gmx.com"
DOMAIN_GMX_NET = "gmx.net"
DOMAIN_GMX_AT = "gmx.at"
DOMAIN_GMX_FR = "gmx.fr"
DOMAIN_GMX_US = "gmx.us"
DOMAIN_GMX_SG = "gmx.sg"
DOMAIN_GMX_CH = "gmx.ch"
DOMAIN_GMX_PT = "gmx.pt"
DOMAIN_ONET = "onet.pl"
DOMAIN_GAZETA_PL = "gazeta.pl"
DOMAIN_NAVER = "naver.com"
DOMAIN_INBOX_LV = "inbox.lv"
DOMAIN_GMX_DE = "gmx.de"
DOMAIN_PISS_MAIL = "pissmail.com"
DOMAIN_INCEL_EMAIL = "incel.email"
DOMAIN_SHITPOSTING_EXPERT = "shitposting.expert"
DOMAIN_HATESJE_WS = "hatesje.ws"
DOMAIN_CHILD_PIZZA = "child.pizza"
DOMAIN_GENOCIDE_FUN = "genocide.fun"
DOMAIN_DMC_CHAT = "dmc.chat"
DOMAIN_WEB_DE = "web.de"
DOMAIN_OUTLOOK_COM = "outlook.com"
DOMAIN_FIREMAIL_DE = "firemail.de"
DOMAIN_HATESJE_WS = "hatesje.ws"
DOMAIN_CHILD_PIZZA = "child.pizza"
DOMAIN_GENOCIDE_FUN = "genocide.fun"
DOMAIN_DMC_CHAT = "dmc.chat"
DOMAIN_WEB_DE = "web.de"
DOMAIN_OUTLOOK_COM = "outlook.com"
DOMAIN_FIREMAIL_DE = "firemail.de"
# IMAP服务器地址常量
AOL_IMAP_SERVER = "imap.aol.com"
IMAP_SERVER_163 = "imap.163.com"
IMAP_SERVER_SINA = "imap.sina.com"
YAHOO_IMAP_SERVER = "imap.mail.yahoo.com"
HOTMAIL_IMAP_SERVER = "outlook.office365.com"
# ── Note : les constantes IMAP_SERVER_* ont été supprimées. ───────────────────
# Utiliser imap_proxy_reader.IMAP_SERVER_MAP ou imap_proxy_reader.get_imap_server(login)
# pour obtenir le serveur IMAP correspondant à un domaine.
RAMBLER_IMAP_SERVER = "imap.rambler.ru"
ALICE_IMAP_SERVER = "in.alice.it"
TIME_IT_SERVER = "imap.tim.it"
MARS_DTI_NE_JP_SERVER = "imap.cm.dream.jp"
NAVER_SERVER = "imap.naver.com"
BTVM_NE_JP_SERVER = "imap.btvm.ne.jp"
GMAIL_IMAP_SERVER = "imap.gmail.com"
ONET_IMAP_SERVER = "imap.poczta.onet.pl"
GMX_IMAP_SERVER = "imap.gmx.com"
GMX_NET_IMAP_SERVER = "imap.gmx.net"
GMX_AT_IMAP_SERVER = "imap.gmx.at"
FIREMAIL_DE_IMAP_SERVER = "imap.firemail.de"
PISS_MAIL_IMAP_SERVER = "mail.pissmail.com"
INBOX_LV_IMAP_SERVER = "mail.inbox.lv"
WEB_DE_IMAP_SERVER = "imap.web.de"
GAZETA_PL_IMAP_SERVER = "imap.gazeta.pl"
# ── IMAPClient avec IMAP ID spoofing ─────────────────────────────────────────
class FingerprintIMAPClient(IMAPClient):
"""
IMAPClient qui envoie automatiquement la commande IMAP ID (RFC 2971)
après chaque login(), pour usurper le fingerprint d'un vrai client mail.
"""
def login(self, username: str, password: str):
result = super().login(username, password)
send_imap_id(self)
return result
# ── Fabrique d'instances IMAPClient ──────────────────────────────────────────
def show_folders(imap) -> list:
"""Retourne la liste des dossiers IMAP (compatible IMAPClient et imaplib)."""
folders = []
isImapClient = isinstance(imap, IMAPClient)
if not isImapClient:
is_imap_client = isinstance(imap, IMAPClient)
if not is_imap_client:
for i in imap.list()[1]:
l = i.decode().split(' "/" ')
if len(l) > 1:
folders.append(l[1])
if len(folders) == 0:
folders.append('INBOX')
return folders
parts = i.decode().split(' "/" ')
if len(parts) > 1:
folders.append(parts[1])
if not folders:
folders.append("INBOX")
else:
list = imap.list_folders()
for i in list:
name = i[-1]
folders.append(name)
return folders
for info in imap.list_folders():
folders.append(info[-1])
return folders
def create_imap(login: str):
# 创建一个IMAP4类实例
if DOMAIN_163 in login:
imap = IMAPClient(IMAP_SERVER_163, use_uid=True)
elif DOMAIN_YAHOO in login:
imap = IMAPClient(YAHOO_IMAP_SERVER, use_uid=True)
elif DOMAIN_FIREMAIL_DE in login:
imap = IMAPClient(FIREMAIL_DE_IMAP_SERVER, use_uid=True)
elif DOMAIN_GMX in login or DOMAIN_GMX_FR in login or DOMAIN_GMX_US in login or DOMAIN_GMX_CH in login or DOMAIN_GMX_PT in login or DOMAIN_GMX_SG in login:
imap = IMAPClient(GMX_IMAP_SERVER, use_uid=True)
elif DOMAIN_SINA in login:
imap = IMAPClient(IMAP_SERVER_SINA, use_uid=True)
elif DOMAIN_HOTMAIL in login or DOMAIN_OUTLOOK_COM in login:
imap = IMAPClient(HOTMAIL_IMAP_SERVER, use_uid=True)
elif DOMAIN_RAMBLER_RU in login:
imap = IMAPClient(RAMBLER_IMAP_SERVER, use_uid=True)
elif DOMAIN_BTVM_NE_JP in login:
imap = IMAPClient(BTVM_NE_JP_SERVER, use_uid=True)
elif DOMAIN_GMAIL in login:
imap = IMAPClient(GMAIL_IMAP_SERVER, use_uid=True)
elif DOMAIN_ONET in login:
imap = IMAPClient(ONET_IMAP_SERVER, use_uid=True)
elif DOMAIN_TIM_IT in login:
imap = IMAPClient(TIME_IT_SERVER, use_uid=True)
elif DOMAIN_ALICE_IT in login:
imap = IMAPClient(ALICE_IMAP_SERVER, use_uid=True)
elif DOMAIN_MARS_DTI_NE_JP in login:
imap = IMAPClient(MARS_DTI_NE_JP_SERVER, use_uid=True)
elif DOMAIN_AURORA_DTI_NE_JP in login:
imap = IMAPClient(MARS_DTI_NE_JP_SERVER, use_uid=True)
elif DOMAIN_NAVER in login:
imap = IMAPClient(NAVER_SERVER, use_uid=True)
elif DOMAIN_GMX_DE in login or DOMAIN_GMX_NET in login:
imap = IMAPClient(GMX_NET_IMAP_SERVER, use_uid=True)
elif DOMAIN_GMX_AT in login:
imap = IMAPClient(GMX_AT_IMAP_SERVER, use_uid=True)
elif DOMAIN_GAZETA_PL in login:
imap = IMAPClient(GAZETA_PL_IMAP_SERVER, use_uid=True)
elif DOMAIN_INBOX_LV in login:
imap = IMAPClient(INBOX_LV_IMAP_SERVER, use_uid=True)
elif DOMAIN_WEB_DE in login:
imap = IMAPClient(WEB_DE_IMAP_SERVER, use_uid=True)
elif DOMAIN_PISS_MAIL in login or DOMAIN_CHILD_PIZZA in login or DOMAIN_DMC_CHAT in login or DOMAIN_GENOCIDE_FUN in login or DOMAIN_HATESJE_WS in login or DOMAIN_INCEL_EMAIL in login or DOMAIN_SHITPOSTING_EXPERT in login:
imap = IMAPClient(PISS_MAIL_IMAP_SERVER, use_uid=True)
else:
imap = IMAPClient(AOL_IMAP_SERVER, use_uid=True)
return imap
def create_imap(login: str) -> FingerprintIMAPClient:
"""
Crée et retourne un FingerprintIMAPClient connecté au bon serveur IMAP
pour le domaine de l'adresse email fournie.
La résolution domaine → serveur est déléguée à get_imap_server()
(défini dans imap_proxy_reader, source de vérité unique).
"""
server = get_imap_server(login)
return FingerprintIMAPClient(server, use_uid=True)
+230 -157
View File
@@ -2,39 +2,59 @@ import datetime
import email
import logging
import os
import random
import re
from concurrent.futures import ThreadPoolExecutor
import time
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed
from email.header import decode_header
from email.message import Message
from typing import Union, List, Optional
from typing import Union, List, Optional, Dict
from dotenv import load_dotenv
from imapclient import IMAPClient
from db.mongo_manager import MONGO_STORE_MANAGER
from excel_reader import read_contacts
from mail.mail_constants import DOMAIN_HOTMAIL, create_imap
from mail.imap_proxy_reader import ProxyIMAPClient, ProxyConfig, get_imap_server
from mail.mail_constants import DOMAIN_HOTMAIL, create_imap, show_folders
from mail.imap_proxy_reader import (
ProxyIMAPClient, ProxyConfig, get_imap_server,
extract_body,
VALIDATION_URL_SUBJECT_FR, VALIDATION_URL_SUBJECT_EN,
VALIDATION_URL_REGEX, DATE_FORMAT,
)
from imapclient import IMAPClient
from models.ReserveResultPojo import ReserveResultPojo
from models.mail_pojo import MailPojo, MailAddress
# Charger les variables d'environnement depuis .env
load_dotenv()
# 定义常量
VALIDATION_URL_SUBJECT_FR = 'Validation de votre demande de rendez-vous'
VALIDATION_URL_SUBJECT_EN = 'Please confirm your appointment request'
VALIDATION_URL_REGEX = r"https:\/\/rendezvousparis.hermes.com\/client\/register\/[A-Z0-9]+\/validate.code=[A-Z0-9]+"
# ── Constantes locales ────────────────────────────────────────────────────────
# VALIDATION_URL_SUBJECT_FR, VALIDATION_URL_SUBJECT_EN, VALIDATION_URL_REGEX,
# DATE_FORMAT sont importés depuis imap_proxy_reader (source de vérité unique).
PART_VALIDATION_URL_REGEX = r"client\/register\/[A-Z0-9]+\/validate.code=[A-Z0-9]+"
HERMES_EMAIL = "no-reply@hermes.com"
EMAIL_ADDRESS_REGEX = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b'
# 日期格式
DATE_FORMAT = "%d-%b-%Y"
# Timeouts GMX (en secondes)
IMAP_SOCKET_TIMEOUT = 300 # timeout socket pour chaque opération IMAP
FUTURE_TIMEOUT = 600 # durée max allouée à la lecture d'une boîte mail
# 按域名限制的最大并发线程数(防止单服务商触发风控)
MAX_WORKERS_PER_DOMAIN: Dict[str, int] = {
"gmx": 80,
"aol": 5,
"gmail": 3,
"yahoo": 5,
"outlook": 5,
"hotmail": 5,
"firemail": 5,
"inbox.lv": 5,
"default": 5,
}
# 两次读取同一邮箱的最短间隔(分钟),避免频繁重复登录
MAIL_READ_MIN_INTERVAL_MINUTES = 15
# GMX域名列表(用于判断是否需要使用代理)
GMX_DOMAINS = (
"gmx.com", "gmx.net", "gmx.de", "gmx.at",
@@ -42,8 +62,9 @@ GMX_DOMAINS = (
)
# 需要通过代理读取的域名列表
# PROXY_DOMAINS = GMX_DOMAINS + ("inbox.lv",)
PROXY_DOMAINS = GMX_DOMAINS
PROXY_DOMAINS = GMX_DOMAINS + ("yahoo.com",)
# PROXY_DOMAINS = GMX_DOMAINS + ("yahoo.com",)
# PROXY_DOMAINS = GMX_DOMAINS
def is_gmx_account(login: str) -> bool:
@@ -55,6 +76,18 @@ def is_proxy_account(login: str) -> bool:
"""判断邮箱是否需要通过代理读取(GMX 或 inbox.lv"""
return any(d in login.lower() for d in PROXY_DOMAINS)
def get_domain_group(login: str) -> str:
"""
将邮箱地址映射到域名分组键,用于限流。
例如: "user@gmx.net""gmx", "user@aol.com""aol"
"""
login_lower = login.lower()
for key in MAX_WORKERS_PER_DOMAIN:
if key != "default" and key in login_lower:
return key
return "default"
# 邮箱列表(简化为常量)
REDIRECTION_MAILS = "appointment2022@aol.com, chenpeijun@aol.com,hongjiang176@aol.com,ciyuexie@aol.com,rutger.62@aol.com,ciccidaniel@aol.com,armasgoodman@aol.com,wknd.gemerine@aol.com,rafmail1981@aol.com,tonovichivanenaki@aol.com,hetland.ari@aol.com,mateusiversen@aol.com,lacerdaraffaello@aol.com,anasida76@aol.com,liamolinari@aol.com,sen70zib@aol.com,mezeiderrick@aol.com,stanisl49avchic@aol.com,damcvrobaneuron@aol.com,suyzanna_fleona@aol.com,dxealing.dissa@aol.com,hogg.karen@aol.com,obocharovamarina@aol.com,buchholzjohann@aol.com,orn.cecchini@aol.com,percivaltorgersen@aol.com,candalgudrun@aol.com,filimonis.76@aol.com,bengann_100@aol.com,axelhanne@aol.com,tiffanylarochelle@aol.com,nicoleta.r@aol.com,eichenbaum.1963@aol.com,kotensasharev@aol.com,samognat32@aol.com,edem_headshot@aol.com,kozmakuzmich1960@aol.com,damonsvensson@aol.com,anders.riva@aol.com,caiminwei123@gmail.com,yulingguo086@gmail.com,yingxiaolu086@gmail.com,lijiazhen0035@gmail.com,fangp370@gmail.com,huangyayu10086@gmail.com,fuziyuan110@gmail.com,xinyingdu886@gmail.com,yasiaforever.1971@aol.com,lukaszfidalgo@aol.com,zaichi29@aol.com,prostotakitak.1974@aol.com,mo90nroe@aol.com,blonde.87@aol.com,dimidrol.1969@aol.com"
@@ -95,39 +128,29 @@ class MailReader:
"""邮件读取器类"""
def __init__(self, login: str, password: str, proxy: Optional[ProxyConfig] = None,
failed_gmx_list: Optional[List[str]] = None):
failed_gmx_list: Optional[List[str]] = None,
delay_range: tuple = (1.0, 5.0)):
self.login = login
self.password = password
self.proxy = proxy
self.failed_gmx_list = failed_gmx_list if failed_gmx_list is not None else []
@staticmethod
def show_folders(imap) -> List[str]:
"""获取邮箱文件夹列表"""
folders = []
is_imap_client = isinstance(imap, IMAPClient)
if not is_imap_client:
# 处理非IMAPClient对象
for i in imap.list()[1]:
l = i.decode().split(' "/" ')
folders.append(l[1])
else:
# 处理IMAPClient对象
folder_list = imap.list_folders()
for i in folder_list:
name = i[-1]
folders.append(name)
return folders
self.delay_range = delay_range # (min_seconds, max_seconds) 随机延迟范围
def read_emails(self, mails_messages: List[MailPojo]) -> List[MailPojo]:
"""读取邮件"""
"""读取邮件(含随机延迟和读取时间记录)"""
# 随机延迟,模拟人工节奏,降低被识别为机器人的概率
_delay = random.uniform(*self.delay_range)
time.sleep(_delay)
# ── GMX / inbox.lv 账户 → 使用代理连接(失败自动重试最多3次)──
if is_proxy_account(self.login) and self.proxy is not None:
return self._read_emails_with_proxy_retry(mails_messages)
result = self._read_emails_with_proxy_retry(mails_messages)
else:
return self._read_emails_internal(create_imap(self.login), mails_messages)
result = self._read_emails_internal(create_imap(self.login), mails_messages)
# 记录本次读取时间,供下次调用的 need_to_check_email 判断间隔
MONGO_STORE_MANAGER.update_mail_read_time(self.login)
return result
def _read_emails_with_proxy_retry(
self,
@@ -175,8 +198,8 @@ class MailReader:
mail_list = []
print("read mails from {}".format(self.login))
# 获取文件夹列表
folder_list = self.show_folders(imap)
# 获取文件夹列表(委托给 mail_constants.show_folders
folder_list = show_folders(imap)
# 处理每个文件夹
for folder in folder_list:
@@ -204,88 +227,72 @@ class MailReader:
return mail_list
def _get_messages_from_folder(self, imap, subject: str, folder: str = "INBOX") -> List[MailPojo]:
"""从指定文件夹获取邮件(传统IMAP方式)"""
"""从指定文件夹获取邮件(传统IMAP方式,批量fetch减少往返次数"""
imap.select(folder)
mail_messages = []
# 搜索邮件
# 搜索符合条件的所有邮件ID
search_query = '(SUBJECT "{}" SINCE "{}")'.format(subject, datetime.datetime.today().strftime(DATE_FORMAT))
typ, data = imap.search(None, search_query)
for i in data[0].split():
ids = data[0].split()
if not ids:
return mail_messages
# 批量fetch:一次请求取回所有匹配邮件,减少 N 次往返为 1 次
id_list = b",".join(ids)
try:
res, msg_list = imap.fetch(id_list, "(RFC822)")
except Exception as error:
print("Batch fetch error in folder {}: {}".format(folder, error))
return mail_messages
for response in msg_list:
if not isinstance(response, tuple):
continue
try:
# 获取邮件内容
res, msg = imap.fetch(i.decode("utf-8"), "(RFC822)")
email_message = email.message_from_bytes(response[1])
# 解析邮件
for response in msg:
if isinstance(response, tuple):
email_message = email.message_from_bytes(response[1])
# 解码主题
subject_decoded, subject_encoded = decode_header(email_message["Subject"])[0]
if isinstance(subject_decoded, bytes):
subject_decoded = subject_decoded.decode(subject_encoded)
# 解码主题
subject, subject_encoded = decode_header(email_message["Subject"])[0]
if isinstance(subject, bytes):
subject = subject.decode(subject_encoded)
# 解码发件人地址
from_address = find_from_mail(decode_header(email_message.get("From")))
# 解码件人地址
from_address = find_from_mail(decode_header(email_message.get("From")))
# 解码件人地址
to_email = find_from_mail(decode_header(email_message.get("To")))
# 解码收件人地址
to_email = find_from_mail(decode_header(email_message.get("To")))
print("Email:", self.login)
print("From:", from_address)
print("To:", to_email)
print("Subject:", subject_decoded)
print("Email:", self.login)
print("From:", from_address)
print("To:", to_email)
print("Subject:", subject)
# 获取邮件正文(委托给 imap_proxy_reader.extract_body
body = extract_body(email_message)
# 获取邮件正文
body = self._extract_body(email_message)
# 检查是否是预约验证邮件
if VALIDATION_URL_SUBJECT_FR in subject_decoded or VALIDATION_URL_SUBJECT_EN in subject_decoded:
mail = MailPojo(
subject=subject_decoded,
body=body,
from_address=from_address
)
# 检查是否是预约验证邮件
if VALIDATION_URL_SUBJECT_FR in subject or VALIDATION_URL_SUBJECT_EN in subject:
mail = MailPojo(
subject=subject,
body=body,
from_address=from_address
)
# 设置收件人地址
if to_email is None:
mail.to_address = self.login
else:
mail.to_address = to_email
# 设置收件人地址
if to_email is None:
mail.to_address = self.login
else:
mail.to_address = to_email
mail.mail_address = self.login
mail_messages.append(mail)
mail.mail_address = self.login
mail_messages.append(mail)
except Exception as error:
print("Error processing email: {}".format(error))
return mail_messages
def _extract_body(self, email_message: Message) -> str:
"""提取邮件正文"""
body = ""
# 遍历邮件部分
for part in email_message.walk():
try:
content_type = part.get_content_type()
if content_type == "text/html":
# 处理HTML内容
payload = part.get_payload(decode=True)
if payload:
body += payload.decode("utf-8", errors="ignore")
elif content_type == "text/plain":
# 处理纯文本内容
payload = part.get_payload()
if payload:
body += payload
except Exception as error:
print("Error extracting body part: {}".format(error))
return body
def _get_messages_from_folder_for_imapclient(self, imap, folder: str = "INBOX") -> List[MailPojo]:
"""从指定文件夹获取邮件(IMAPClient方式)"""
mail_messages = []
@@ -308,8 +315,8 @@ class MailReader:
email_message = email.message_from_bytes(message_data[b'RFC822'])
# 获取发件人和主题
from_address = email_message.get('FROM')
subject = email_message.get('subject')
from_address = email_message.get('FROM') or ""
subject = email_message.get('subject') or ""
# 检查是否是Hermes邮件
hermes_mail_address = "no-reply@hermes.com"
@@ -317,8 +324,8 @@ class MailReader:
"outlook.com" in from_address or
"hotmail" in from_address):
# 提取邮件正文
body = self._extract_body_for_imapclient(email_message)
# 提取邮件正文(委托给 imap_proxy_reader.extract_body
body = extract_body(email_message)
# 检查是否是预约验证邮件
if (VALIDATION_URL_SUBJECT_FR in subject or
@@ -351,24 +358,6 @@ class MailReader:
return mail_messages
def _extract_body_for_imapclient(self, email_message: Message) -> str:
"""提取IMAPClient邮件正文"""
body = ""
for part in email_message.walk():
content_type = part.get_content_type()
if content_type == "text/html":
payload = part.get_payload(decode=True)
if payload:
body += payload.decode("utf-8", errors="ignore")
elif content_type == "text/plain":
payload = part.get_payload()
if payload:
body += payload
return body
# 邮件处理相关函数
def find_item_by_url(url: str, successful_items) -> Union[None, ReserveResultPojo]:
@@ -405,57 +394,120 @@ def need_to_valid_url(url: str, item: Union[ReserveResultPojo, None]) -> bool:
def need_to_check_email(mail: str, successful_items) -> bool:
"""判断是否需要检查邮件"""
"""
判断是否需要检查邮件。
两种情况跳过:
1. 该邮箱已有成功验证记录(原逻辑)
2. 距上次读取不足 MAIL_READ_MIN_INTERVAL_MINUTES 分钟(防频繁重复登录)
"""
print("successful_items size is " + str(len(successful_items)))
# 过滤已验证的项目
# 原逻辑:已有成功验证则跳过
filtered_items = [item for item in successful_items if item.email == mail]
# 检查是否有已验证的项目
validated_items = [item for item in filtered_items
if item.url_validated is not None and item.url_validated is True]
if len(validated_items) > 0:
return False
return len(validated_items) == 0
# 新逻辑:距上次读取时间太短则跳过
last_read = MONGO_STORE_MANAGER.get_last_mail_read_time(mail)
if last_read is not None:
elapsed_minutes = (datetime.datetime.utcnow() - last_read).total_seconds() / 60
if elapsed_minutes < MAIL_READ_MIN_INTERVAL_MINUTES:
print("[跳过] {} 距上次读取仅 {:.1f} 分钟,未达到最小间隔 {} 分钟".format(
mail, elapsed_minutes, MAIL_READ_MIN_INTERVAL_MINUTES))
return False
return True
def find_links_to_validate_from_mail_list(
mail_list: List[MailAddress],
logger,
proxy: Optional[ProxyConfig] = None,
proxy_pool: Optional[List[ProxyConfig]] = None,
) -> List[str]:
"""从邮件列表中查找需要验证的链接,返回读取失败的GMX账户列表"""
"""
从邮件列表中查找需要验证的链接,返回读取失败的GMX账户列表。
参数
----
proxy : 单一代理(GMX专用,兼容旧调用方式)
proxy_pool : 代理列表(非GMX账号也会轮换使用;若为空则非GMX走直连)
"""
if not mail_list:
return []
# 检查时间前开始检查邮件
contact_to_book_list = MONGO_STORE_MANAGER.get_all_contact_to_book_list()
successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
mails_messages = []
failed_gmx: List[str] = []
# 使用线程池处理邮件
with ThreadPoolExecutor(max_workers=100) as executor:
futures = []
# ── 按域名分组,每组使用独立线程池限流 ────────────────────────────
# domain_group → [(MailAddress, ProxyConfig|None), ...]
grouped: Dict[str, List[tuple]] = defaultdict(list)
for mail in mail_list:
# 检查是否需要读取邮件
if need_to_check_email(mail.mail, successful_items):
mail_reader = MailReader(mail.mail, mail.password, proxy=proxy,
failed_gmx_list=failed_gmx)
future = executor.submit(mail_reader.read_emails, mails_messages)
futures.append(future)
for idx, mail in enumerate(mail_list):
if not need_to_check_email(mail.mail, successful_items):
continue
# 等待所有任务完成
for future in futures:
try:
future.result(timeout=FUTURE_TIMEOUT)
except TimeoutError:
print("⏱️ Timeout ({} s) dépassé pour une boîte mail — lecture ignorée.".format(FUTURE_TIMEOUT))
except Exception as e:
print("Error processing mail: {},login: {}, password: {}".format(e,mail.mail, mail.password))
# 为账号分配代理
if is_proxy_account(mail.mail):
# GMX / inbox.lv → 使用专用 GMX 代理
assigned_proxy = proxy
elif proxy_pool:
# 非GMX + 有代理池 → 按索引轮换分配
assigned_proxy = proxy_pool[idx % len(proxy_pool)]
else:
# 无代理池 → 直连
assigned_proxy = None
# ── Résumé des comptes proxy en échec ──────────────────────
group_key = get_domain_group(mail.mail)
grouped[group_key].append((mail, assigned_proxy))
# ── 每个域名分组启动独立线程池 ────────────────────────────────────
# future → mail address,用于进度显示
future_to_mail: Dict[object, str] = {}
executors = []
for group_key, items in grouped.items():
max_w = MAX_WORKERS_PER_DOMAIN.get(group_key, MAX_WORKERS_PER_DOMAIN["default"])
executor = ThreadPoolExecutor(max_workers=max_w)
executors.append(executor)
print("[限流] 域名组 '{}': {} 账号,max_workers={}".format(
group_key, len(items), max_w))
for mail, assigned_proxy in items:
mail_reader = MailReader(
mail.mail,
mail.password,
proxy=assigned_proxy,
failed_gmx_list=failed_gmx,
)
future = executor.submit(mail_reader.read_emails, mails_messages)
future_to_mail[future] = mail.mail
# ── 等待所有任务完成,然后关闭线程池 ─────────────────────────────
total = len(future_to_mail)
completed = 0
for future in as_completed(future_to_mail):
mail_addr = future_to_mail[future]
completed += 1
try:
future.result(timeout=FUTURE_TIMEOUT)
print("[进度] {}/{} {}".format(completed, total, mail_addr))
except TimeoutError:
print("[进度] {}/{} {} — Timeout ({} s), lecture ignorée.".format(
completed, total, mail_addr, FUTURE_TIMEOUT))
except Exception as e:
print("[进度] {}/{} {} — Erreur: {}".format(completed, total, mail_addr, e))
for executor in executors:
executor.shutdown(wait=False)
# ── 输出代理账号读取摘要 ──────────────────────────────────────────
if failed_gmx:
print("\n[Proxy] ⚠️ {} compte(s) non lus (GMX / inbox.lv) :".format(len(failed_gmx)))
for addr in failed_gmx:
@@ -463,10 +515,9 @@ def find_links_to_validate_from_mail_list(
else:
print("\n[Proxy] ✅ Tous les comptes GMX / inbox.lv ont été lus avec succès.")
# 刷新成功的项目
# ── 处理邮件中的验证链接 ──────────────────────────────────────────
_refreshed_successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
# 处理邮件中的链接
for mail in mails_messages:
match = re.search(VALIDATION_URL_REGEX, mail.body)
if match:
@@ -485,7 +536,7 @@ def find_links_to_validate_from_mail_list(
url,
mail.to_address,
model=_model,
_all_contact_list=contact_to_book_list, _used_ip= _used_ip)
_all_contact_list=contact_to_book_list, _used_ip=_used_ip)
else:
logger.info("do not need to click url --> {}".format(mail.mail_address))
@@ -500,7 +551,8 @@ if __name__ == '__main__':
# file_name="~/Desktop/contact_list_2026-04-21_200_yahoo.xlsx")
# file_name="~/Desktop/contact_list_yahoo_100_20_04.xlsx")
# file_name="~/Desktop/contact_yahoo_5.xlsx")
file_name="~/Desktop/contact_list_2026-04-22.xlsx")
# file_name="~/Desktop/contact_list_2026-04-24_yahoo_50.xlsx")
file_name="~/Desktop/contact_list_2026-04-23.xlsx")
# file_name="~/Desktop/contact_list_2026-04-11.xlsx")
# file_name="~/Desktop/contact_list_2026-04-17.xlsx")
# file_name="~/Desktop/contact_list_inbox_100_14_04.xlsx")
@@ -532,7 +584,7 @@ if __name__ == '__main__':
_to_add = False
if _to_add:
filter_mail.append(mail_pojo)
# filter_mail = [MailAddress("minnakan@firemail.de", "Yjn8nQ0sZ")]
# filter_mail = [MailAddress("pishikmamn@gmx.de", "53OBns2jAXE")]
# ── Mode de lecture : GMX_ONLY=true → uniquement les comptes GMX ──
gmx_only = os.environ.get("GMX_ONLY", "false").strip().lower() == "true"
@@ -550,8 +602,29 @@ if __name__ == '__main__':
username=os.environ.get("GMX_PROXY_USERNAME"),
password=os.environ.get("GMX_PROXY_PASSWORD"),
)
# 非GMX账号代理池(可配置多个,轮换使用;留空则直连)
# 格式:PROXY_POOL_HOSTS="host1:port1,host2:port2",与 GMX_PROXY 同类型
_proxy_pool_raw = os.environ.get("PROXY_POOL_HOSTS", "").strip()
non_gmx_proxy_pool: Optional[List[ProxyConfig]] = None
if _proxy_pool_raw:
non_gmx_proxy_pool = []
for entry in _proxy_pool_raw.split(","):
entry = entry.strip()
if ":" in entry:
_h, _p = entry.rsplit(":", 1)
non_gmx_proxy_pool.append(ProxyConfig(
host=_h,
port=int(_p),
proxy_type=os.environ.get("GMX_PROXY_TYPE", "SOCKS5"),
username=os.environ.get("GMX_PROXY_USERNAME"),
password=os.environ.get("GMX_PROXY_PASSWORD"),
))
# 处理邮件
failed = find_links_to_validate_from_mail_list(filter_mail, logger, proxy=gmx_proxy)
failed = find_links_to_validate_from_mail_list(
filter_mail, logger, proxy=gmx_proxy, proxy_pool=non_gmx_proxy_pool
)
# ── Afficher les comptes GMX non lus ─────────────────────
if failed:
+2 -1
View File
@@ -104,7 +104,8 @@ if __name__ == '__main__':
# file_list = ['~/Desktop/contact_list_inbox_lv_100.xlsx']
# file_list = ['~/Desktop/contact_list_yahoo_100_20_04.xlsx']
# file_list = ['~/Desktop/contact_list_2026-04-21_200_yahoo.xlsx']
file_list = ['~/Desktop/contact_list_2026-04-21.xlsx']
file_list = ['~/Desktop/contact_list_2026-04-23.xlsx']
# file_list = ['~/Desktop/contact_list_2026-04-24_yahoo_50.xlsx']
# file_list = ['~/Desktop/reste_inbox_lv.xlsx']
# file_list = ['~/Desktop/contact_list_2024-09-02_firemail_de_100.xlsx']
# file_list = ['~/Desktop/contact_list_inbox_100_14_04.xlsx']