Compare commits

...

24 Commits

Author SHA1 Message Date
panleicim 6b05eb38d7 exclude proxy for inbox.lv 2026-04-23 22:20:22 +02:00
panleicim 7020866e92 fix for migration cached data 2026-04-17 00:30:00 +02:00
panleicim a2bb4caa74 exclude proxy for inbox.lv 2026-04-16 11:17:39 +02:00
panleicim d45d6f4d7d proxy for inbox.lv 2026-04-09 08:02:23 +02:00
panleicim c84266f8fd add timeout for reading mails 2026-04-04 11:28:49 +02:00
panleicim 1fa29ebd37 add deps to requirements.txt 2026-04-02 23:32:57 +02:00
panleicim ea8673540c print failed gmx mails 2026-03-28 12:13:06 +01:00
panleicim 228c0b9bbb use proxy while reading gmx mails 2026-03-28 11:01:08 +01:00
Lei PAN b224c75ad0 password in env 2026-03-06 19:55:25 +01:00
panleicim 945388bdb5 Merge branch 'master' of gitlab.lpaconsulting.fr:panleicim/appointment_request 2025-12-27 10:16:28 +01:00
Lei PAN ae7c6c13fb Merge branch 'master' of gitlab.lpaconsulting.fr:panleicim/appointment_request 2025-12-21 23:11:11 +01:00
Lei PAN 4e174a02aa Merge branch 'master' of git.lpaconsulting.fr:panleicim/appointment_request 2025-12-21 23:10:06 +01:00
Lei PAN 0719dbdf89 migrate 2 queues to bak queue 2025-12-21 23:09:46 +01:00
panleicim c487defeed Merge branch 'master' of gitlab.lpaconsulting.fr:panleicim/appointment_request 2025-12-21 11:11:43 +01:00
panleicim 717896d92e use contact_list_2025-11-28 contact list 2025-12-21 11:10:24 +01:00
panleicim 6bc925a06b migrate moring_data_cache_2 too 2025-12-20 20:39:34 +01:00
panleicim d7e96b0b08 Merge branch 'feature/5_1_12' 2025-12-20 11:20:56 +01:00
panleicim 2073dd2377 use mongo2 db 2025-12-17 11:18:46 +01:00
panleicim 6438e4dbd3 added script to backup db 2025-12-16 09:56:04 +01:00
panleicim c7a619503b define PROXY_TIMEOUT_S 2025-12-13 10:29:23 +01:00
panleicim f591ff14ac use 5.1.12 tags.js 2025-12-12 14:50:23 +01:00
panleicim 0a7657f716 Merge branch 'refs/heads/feature/get_ip_geo_info' 2025-12-10 23:36:55 +01:00
panleicim 9085a3c1c4 use 100 threads to read mails 2025-12-09 10:30:43 +01:00
panleicim 605bc8b252 use requests instead of curl_cffi 2025-12-01 15:52:03 +01:00
13 changed files with 976 additions and 84 deletions
+94 -33
View File
@@ -1,6 +1,7 @@
import datetime import datetime
import logging import logging
import time import time
import os
from pymongo import MongoClient from pymongo import MongoClient
@@ -11,7 +12,7 @@ from models.contact_pojo import ContactPojo
from models.mail_pojo import MailAddress from models.mail_pojo import MailAddress
from models.regisered_user_pojo import RegisteredUserPojo from models.regisered_user_pojo import RegisteredUserPojo
MONGO_DB_URL = "mongodb://mongo.lpaconsulting.fr/?timeoutMS=100000" MONGO_DB_URL = "mongodb://mongo2.lpaconsulting.fr/?timeoutMS=100000"
CAPTCHA_ERROR_COLLECTION_PREFIX = "CAPTCHA_ERROR_" CAPTCHA_ERROR_COLLECTION_PREFIX = "CAPTCHA_ERROR_"
BLACK_LIST = "BLACK_LIST" BLACK_LIST = "BLACK_LIST"
ACCEPTED_APPOINTMENT_LIST = "ACCEPTED_APPOINTMENT_LIST" ACCEPTED_APPOINTMENT_LIST = "ACCEPTED_APPOINTMENT_LIST"
@@ -25,7 +26,22 @@ CONTACT_LIST_SERIAL_MAP = "CONTACT_LIST_SERIAL_MAP"
class MongoDbManager: class MongoDbManager:
def __init__(self): def __init__(self):
client = MongoClient(MONGO_DB_URL, username='appointment', password='Rdv@2022', authSource='appointment') # Get username and password from environment variables
mongo_username = os.getenv("MONGO_USERNAME")
mongo_password = os.getenv("MONGO_PASSWORD")
# Validate that environment variables exist
if not mongo_username or not mongo_password:
raise ValueError(
"MONGO_USERNAME and MONGO_PASSWORD environment variables must be set"
)
client = MongoClient(
MONGO_DB_URL,
username=mongo_username,
password=mongo_password,
authSource="appointment",
)
self.db = client.appointment self.db = client.appointment
self.logger = logging.getLogger("mongoDb") self.logger = logging.getLogger("mongoDb")
@@ -36,8 +52,13 @@ class MongoDbManager:
def insert_reserve_result(self, collection_name, reserve: ReserveResultPojo): def insert_reserve_result(self, collection_name, reserve: ReserveResultPojo):
try: try:
collection_to_use = self.db[collection_name] collection_to_use = self.db[collection_name]
collection_to_use.replace_one(filter={'_id': reserve.id, }, replacement=reserve.to_firestore_dict(), collection_to_use.replace_one(
upsert=True) filter={
"_id": reserve.id,
},
replacement=reserve.to_firestore_dict(),
upsert=True,
)
except Exception as Error: except Exception as Error:
self.logger.info(Error) self.logger.info(Error)
@@ -83,8 +104,14 @@ class MongoDbManager:
result_list.append(ContactPojo.from_firestore_dict(document)) result_list.append(ContactPojo.from_firestore_dict(document))
return result_list return result_list
def save_links_to_validate(self, link: str, mail_address: str, model: str, def save_links_to_validate(
_all_contact_list: list, _used_ip: str = ""): self,
link: str,
mail_address: str,
model: str,
_all_contact_list: list,
_used_ip: str = "",
):
collection_to_use = self.db[LINKS_TO_VALIDATE] collection_to_use = self.db[LINKS_TO_VALIDATE]
updated_at = time.strftime("%H:%M:%S", time.localtime()) updated_at = time.strftime("%H:%M:%S", time.localtime())
_ip_country = "FR" _ip_country = "FR"
@@ -95,32 +122,42 @@ class MongoDbManager:
_ip_country = _contact.ip_country _ip_country = _contact.ip_country
if len(mail_address) > 0: if len(mail_address) > 0:
collection_to_use.replace_one(filter={'_id': mail_address, }, replacement={ collection_to_use.replace_one(
u'url': link, filter={
u'email': mail_address, "_id": mail_address,
u'serial': serial,
u'model': model,
u'ip_country': _ip_country,
u'_used_ip': _used_ip,
"updated_at": updated_at
}, },
upsert=True) replacement={
"url": link,
"email": mail_address,
"serial": serial,
"model": model,
"ip_country": _ip_country,
"_used_ip": _used_ip,
"updated_at": updated_at,
},
upsert=True,
)
else: else:
collection_to_use.replace_one(filter={'_id': link, }, replacement={ collection_to_use.replace_one(
u'url': link, filter={
u'serial': serial, "_id": link,
u'model': model,
u'ip_country': _ip_country,
u'_used_ip': _used_ip,
"updated_at": updated_at
}, },
upsert=True) replacement={
"url": link,
"serial": serial,
"model": model,
"ip_country": _ip_country,
"_used_ip": _used_ip,
"updated_at": updated_at,
},
upsert=True,
)
def get_code_for_email(self, email: str): def get_code_for_email(self, email: str):
collection_name = DESTINATION_EMAIL_LIST collection_name = DESTINATION_EMAIL_LIST
try: try:
collection_to_use = self.db[collection_name] collection_to_use = self.db[collection_name]
mailDocument = collection_to_use.find_one(filter={'_id': email}) mailDocument = collection_to_use.find_one(filter={"_id": email})
if mailDocument is not None: if mailDocument is not None:
return MailAddress.from_firestore_dict(mailDocument).password return MailAddress.from_firestore_dict(mailDocument).password
else: else:
@@ -134,7 +171,9 @@ class MongoDbManager:
_cursor = self.db[_collection_name] _cursor = self.db[_collection_name]
registered_user_list = [] registered_user_list = []
for document in _cursor.find(): for document in _cursor.find():
registered_user_list.append(RegisteredUserPojo.from_firestore_dict(document)) registered_user_list.append(
RegisteredUserPojo.from_firestore_dict(document)
)
return registered_user_list return registered_user_list
def get_destination_emails(self) -> list: def get_destination_emails(self) -> list:
@@ -167,8 +206,18 @@ class MongoDbManager:
self.logger.info(error) self.logger.info(error)
return link_list return link_list
def link_validated_for_result(self, link: str, linkPojo: LinkPojo, state=True, is_duplicated=False, def link_validated_for_result(
is_invalid=False, segement_position=1, ua="", model="", timestamp_in_s: list = None): self,
link: str,
linkPojo: LinkPojo,
state=True,
is_duplicated=False,
is_invalid=False,
segement_position=1,
ua="",
model="",
timestamp_in_s: list = None,
):
if timestamp_in_s is None: if timestamp_in_s is None:
timestamp_in_s = [] timestamp_in_s = []
print("link_validated_for_result() called with url = " + link) print("link_validated_for_result() called with url = " + link)
@@ -181,7 +230,10 @@ class MongoDbManager:
print("link_validated_for_result() called with id = " + _id) print("link_validated_for_result() called with id = " + _id)
collection_name = str(datetime.date.today()) collection_name = str(datetime.date.today())
print("link_validated_for_result() called with collection_name = " + collection_name) print(
"link_validated_for_result() called with collection_name = "
+ collection_name
)
collection = self.db[collection_name] collection = self.db[collection_name]
validated_at = time.strftime("%H:%M:%S", time.localtime()) validated_at = time.strftime("%H:%M:%S", time.localtime())
@@ -190,18 +242,27 @@ class MongoDbManager:
validated_by = "Invalid" validated_by = "Invalid"
if is_duplicated: if is_duplicated:
validated_by = "Double" validated_by = "Double"
collection.find_one_and_update({'_id': _id}, { collection.find_one_and_update(
"$set": {"url_validated": state, "validated_at": validated_at, "id": _id, "email": linkPojo.email, {"_id": _id},
{
"$set": {
"url_validated": state,
"validated_at": validated_at,
"id": _id,
"email": linkPojo.email,
"url": link, "url": link,
"validated_by_model": model, "validated_by_model": model,
"serial": linkPojo.serial, "serial": linkPojo.serial,
"validated_by_ua": ua, "validated_by_ua": ua,
"timestamp_in_s": "-".join(str(x) for x in timestamp_in_s), "timestamp_in_s": "-".join(str(x) for x in timestamp_in_s),
"validated_by": validated_by}}, "validated_by": validated_by,
upsert=True) }
},
upsert=True,
)
# remove the link from db # remove the link from db
collection_to_use = self.db[LINKS_TO_VALIDATE] collection_to_use = self.db[LINKS_TO_VALIDATE]
collection_to_use.delete_one({'_id': linkPojo.email}) collection_to_use.delete_one({"_id": linkPojo.email})
MONGO_STORE_MANAGER = MongoDbManager() MONGO_STORE_MANAGER = MongoDbManager()
+539
View File
@@ -0,0 +1,539 @@
"""
imap_proxy_reader.py
====================
Lire des emails via IMAPClient en passant par un proxy SOCKS5/SOCKS4/HTTP.
Fonctionnement :
- ProxyIMAP4_TLS : sous-classe de imaplib.IMAP4 qui ouvre la socket
à travers un proxy SOCKS via PySocks.
- ProxyIMAPClient : sous-classe de IMAPClient qui injecte ProxyIMAP4_TLS
au lieu de la connexion directe habituelle.
Dépendances :
pip install imapclient PySocks
"""
import datetime
import email
import imaplib
import io
import logging
import os
import re
import ssl
import socket
from dataclasses import dataclass, field
from email.message import Message
from typing import List, Optional, Tuple
import socks
from dotenv import load_dotenv
from imapclient import IMAPClient
load_dotenv()
# ──────────────────────────────────────────────────────────────
# Constantes
# ──────────────────────────────────────────────────────────────
VALIDATION_URL_SUBJECT_FR = "Validation de votre demande de rendez-vous"
VALIDATION_URL_SUBJECT_EN = "Please confirm your appointment request"
VALIDATION_URL_REGEX = (
r"https:\/\/rendezvousparis\.hermes\.com"
r"\/client\/register\/[A-Z0-9]+\/validate\.code=[A-Z0-9]+"
)
DATE_FORMAT = "%d-%b-%Y"
# Correspondance domaine → serveur IMAP (identique à mail_constants.py)
IMAP_SERVER_MAP: List[Tuple[str, str]] = [
("163.com", "imap.163.com"),
("yahoo.com", "imap.mail.yahoo.com"),
("firemail.de", "imap.firemail.de"),
("gmail.com", "imap.gmail.com"),
("sina.com", "imap.sina.com"),
("hotmail.com", "outlook.office365.com"),
("outlook.com", "outlook.office365.com"),
("rambler.ru", "imap.rambler.ru"),
("btvm.ne.jp", "imap.btvm.ne.jp"),
("mars.dti.ne.jp", "imap.cm.dream.jp"),
("aurora.dti.ne.jp", "imap.cm.dream.jp"),
("naver.com", "imap.naver.com"),
("onet.pl", "imap.poczta.onet.pl"),
("gazeta.pl", "imap.gazeta.pl"),
("tim.it", "imap.tim.it"),
("alice.it", "in.alice.it"),
("gmx.com", "imap.gmx.com"),
("gmx.fr", "imap.gmx.com"),
("gmx.us", "imap.gmx.com"),
("gmx.ch", "imap.gmx.com"),
("gmx.pt", "imap.gmx.com"),
("gmx.sg", "imap.gmx.com"),
("gmx.net", "imap.gmx.net"),
("gmx.de", "imap.gmx.net"),
("gmx.at", "imap.gmx.at"),
("web.de", "imap.web.de"),
("inbox.lv", "mail.inbox.lv"),
("pissmail.com", "mail.pissmail.com"),
("incel.email", "mail.pissmail.com"),
("shitposting.expert","mail.pissmail.com"),
("hatesje.ws", "mail.pissmail.com"),
("child.pizza", "mail.pissmail.com"),
("genocide.fun", "mail.pissmail.com"),
("dmc.chat", "mail.pissmail.com"),
("aol.com", "imap.aol.com"), # fallback AOL
]
PROXY_TYPE_MAP = {
"SOCKS5": socks.SOCKS5,
"SOCKS4": socks.SOCKS4,
"HTTP": socks.HTTP,
}
logger = logging.getLogger(__name__)
# ──────────────────────────────────────────────────────────────
# Modèles de données
# ──────────────────────────────────────────────────────────────
@dataclass
class ProxyConfig:
"""Configuration du proxy."""
host: str
port: int
proxy_type: str = "SOCKS5" # "SOCKS5" | "SOCKS4" | "HTTP"
username: Optional[str] = None
password: Optional[str] = None
@property
def socks_type(self) -> int:
t = self.proxy_type.upper()
if t not in PROXY_TYPE_MAP:
raise ValueError(f"proxy_type invalide : {self.proxy_type!r}. "
f"Valeurs autorisées : {list(PROXY_TYPE_MAP)}")
return PROXY_TYPE_MAP[t]
def __repr__(self) -> str:
auth = f"{self.username}:***@" if self.username else ""
return f"{self.proxy_type}://{auth}{self.host}:{self.port}"
@dataclass
class MailAccount:
"""Compte email à lire."""
login: str
password: str
@dataclass
class MailResult:
"""Résultat d'une lecture d'email."""
account: str
subject: str
from_address: str
to_address: str
body: str
validation_urls: List[str] = field(default_factory=list)
# ──────────────────────────────────────────────────────────────
# Connexion IMAP via proxy (bas niveau)
# ──────────────────────────────────────────────────────────────
class ProxyIMAP4_TLS(imaplib.IMAP4):
"""
Variante TLS de imaplib.IMAP4 qui route la connexion
à travers un proxy SOCKS5/SOCKS4/HTTP grâce à PySocks.
"""
def __init__(
self,
host: str,
port: int,
ssl_context: Optional[ssl.SSLContext],
proxy: ProxyConfig,
timeout: Optional[float] = None,
):
self._ssl_context = ssl_context
self._proxy = proxy
self._timeout = timeout
# imaplib.IMAP4.__init__ appelle self.open()
imaplib.IMAP4.__init__(self, host, port)
self.file: io.BufferedReader
def open(self, host: str = "", port: int = 993, timeout: Optional[float] = None) -> None:
self.host = host
self.port = port
effective_timeout = timeout if timeout is not None else self._timeout
# ── Créer la socket SOCKS ────────────────────────────
sock = socks.socksocket(socket.AF_INET, socket.SOCK_STREAM)
sock.set_proxy(
proxy_type=self._proxy.socks_type,
addr=self._proxy.host,
port=self._proxy.port,
username=self._proxy.username,
password=self._proxy.password,
)
if effective_timeout:
sock.settimeout(effective_timeout)
sock.connect((host, port))
# ── Envelopper avec SSL/TLS ──────────────────────────
ctx = self._ssl_context or ssl.create_default_context()
self.sock = ctx.wrap_socket(sock, server_hostname=host)
self.file = self.sock.makefile("rb")
# ── Méthodes requises par imaplib.IMAP4 ─────────────────
def read(self, size: int) -> bytes:
return self.file.read(size) # type: ignore[return-value]
def readline(self) -> bytes:
return self.file.readline() # type: ignore[return-value]
def send(self, data) -> None:
self.sock.sendall(data)
def shutdown(self) -> None:
imaplib.IMAP4.shutdown(self)
# ──────────────────────────────────────────────────────────────
# IMAPClient avec proxy
# ──────────────────────────────────────────────────────────────
class ProxyIMAPClient(IMAPClient):
"""
Sous-classe d'IMAPClient qui utilise un proxy SOCKS/HTTP.
Usage :
proxy = ProxyConfig(host="127.0.0.1", port=1080, proxy_type="SOCKS5")
client = ProxyIMAPClient("imap.gmail.com", proxy=proxy, use_uid=True)
client.login("user@gmail.com", "password")
"""
def __init__(self, host: str, proxy: ProxyConfig, **kwargs):
self._proxy = proxy
super().__init__(host, **kwargs)
def _create_IMAP4(self):
"""Remplace la méthode d'IMAPClient pour injecter ProxyIMAP4_TLS."""
if self.ssl:
# self._timeout peut être un float (secondes) ou un objet avec
# un attribut 'connect' (ex : urllib3 Timeout). On gère les deux.
_timeout = self._timeout
if _timeout is not None and not isinstance(_timeout, (int, float)):
_timeout = getattr(_timeout, "connect", None)
return ProxyIMAP4_TLS(
host=self.host,
port=self.port,
ssl_context=self.ssl_context,
proxy=self._proxy,
timeout=_timeout,
)
# Connexion non-SSL à travers le proxy (rare, mais supporté)
# On monkey-patch juste la connexion TCP
raise NotImplementedError(
"Connexion IMAP non-SSL via proxy non implémentée. "
"Utilisez ssl=True (port 993)."
)
# ──────────────────────────────────────────────────────────────
# Fonctions utilitaires
# ──────────────────────────────────────────────────────────────
def get_imap_server(login: str) -> str:
"""Retourne le serveur IMAP correspondant au domaine du login."""
login_lower = login.lower()
for domain, server in IMAP_SERVER_MAP:
if domain in login_lower:
return server
return "imap.aol.com" # fallback
def extract_body(email_message: Message) -> str:
"""Extrait le corps HTML ou texte d'un email."""
body = ""
for part in email_message.walk():
content_type = part.get_content_type()
try:
if content_type == "text/html":
payload = part.get_payload(decode=True)
if payload:
body += payload.decode("utf-8", errors="ignore")
elif content_type == "text/plain":
payload = part.get_payload()
if payload:
body += str(payload)
except Exception as exc:
logger.warning("Erreur extraction body : %s", exc)
return body
def find_validation_urls(text: str) -> List[str]:
"""Recherche toutes les URLs de validation Hermes dans un texte."""
return re.findall(VALIDATION_URL_REGEX, text)
# ──────────────────────────────────────────────────────────────
# Lecteur principal
# ──────────────────────────────────────────────────────────────
class ProxyMailReader:
"""
Lit les emails d'un compte via IMAPClient en passant par un proxy.
Paramètres
----------
account : MailAccount
Identifiants du compte email.
proxy : ProxyConfig
Configuration du proxy.
timeout : float, optional
Timeout de connexion en secondes (défaut : 30 s).
"""
def __init__(
self,
account: MailAccount,
proxy: ProxyConfig,
timeout: float = 30.0,
):
self.account = account
self.proxy = proxy
self.timeout = timeout
# ── Connexion ────────────────────────────────────────────
def _connect(self) -> ProxyIMAPClient:
imap_server = get_imap_server(self.account.login)
logger.info(
"[%s] Connexion via %s%s:993",
self.account.login, self.proxy, imap_server,
)
client = ProxyIMAPClient(
host=imap_server,
proxy=self.proxy,
use_uid=True,
ssl=True,
timeout=self.timeout,
)
client.login(self.account.login, self.account.password)
logger.info("[%s] Connecté.", self.account.login)
return client
# ── Lecture des dossiers ─────────────────────────────────
def _list_folders(self, client: ProxyIMAPClient) -> List[str]:
return [info[-1] for info in client.list_folders()]
# ── Lecture des messages ─────────────────────────────────
def _read_folder(
self,
client: ProxyIMAPClient,
folder: str,
since: Optional[datetime.datetime] = None,
) -> List[MailResult]:
results: List[MailResult] = []
since = since or datetime.datetime.today()
try:
client.select_folder(folder, readonly=True)
except Exception as exc:
logger.warning("[%s] Impossible d'ouvrir '%s' : %s",
self.account.login, folder, exc)
return results
try:
uids = client.search(["SINCE", since])
except Exception as exc:
logger.warning("[%s] Recherche échouée dans '%s' : %s",
self.account.login, folder, exc)
return results
if not uids:
return results
logger.info("[%s] %d message(s) dans '%s'",
self.account.login, len(uids), folder)
for uid, msg_data in client.fetch(uids, "RFC822").items():
try:
raw = msg_data.get(b"RFC822") or msg_data.get("RFC822")
if raw is None:
continue
em = email.message_from_bytes(raw)
subject = em.get("Subject", "")
from_addr = em.get("From", "")
to_addr = em.get("To", self.account.login)
# Filtrer : on ne garde que les emails de validation Hermes
is_validation = (
VALIDATION_URL_SUBJECT_FR in subject
or VALIDATION_URL_SUBJECT_EN in subject
or "no-reply@hermes.com" in from_addr.lower()
)
if not is_validation:
continue
body = extract_body(em)
urls = find_validation_urls(body)
result = MailResult(
account=self.account.login,
subject=subject,
from_address=from_addr,
to_address=to_addr,
body=body,
validation_urls=urls,
)
results.append(result)
logger.info(
"[%s] Email de validation trouvé (uid=%s) — URLs : %s",
self.account.login, uid, urls or "aucune",
)
except Exception as exc:
logger.warning(
"[%s] Erreur traitement uid=%s : %s",
self.account.login, uid, exc,
)
return results
# ── Point d'entrée public ────────────────────────────────
def read(
self,
since: Optional[datetime.datetime] = None,
skip_folders: Optional[List[str]] = None,
) -> List[MailResult]:
"""
Se connecte au serveur IMAP via le proxy et retourne la liste
des emails de validation trouvés depuis `since` (aujourd'hui par défaut).
Paramètres
----------
since : datetime, optional — date de début de recherche
skip_folders : list[str], optional — dossiers à ignorer
(défaut : ["Sent", "Drafts", "Trash", "Junk", "Spam"])
"""
if skip_folders is None:
skip_folders = ["Sent", "Drafts", "Trash", "Junk", "Spam"]
all_results: List[MailResult] = []
client = self._connect()
try:
folders = self._list_folders(client)
logger.info("[%s] Dossiers : %s", self.account.login, folders)
for folder in folders:
if folder in skip_folders:
logger.debug("[%s] Dossier ignoré : %s",
self.account.login, folder)
continue
all_results.extend(self._read_folder(client, folder, since))
finally:
try:
client.logout()
except Exception:
pass
return all_results
# ──────────────────────────────────────────────────────────────
# Lecture parallèle de plusieurs comptes
# ──────────────────────────────────────────────────────────────
from concurrent.futures import ThreadPoolExecutor, as_completed
def read_multiple_accounts(
accounts: List[MailAccount],
proxy: ProxyConfig,
since: Optional[datetime.datetime] = None,
max_workers: int = 10,
timeout: float = 30.0,
) -> List[MailResult]:
"""
Lit plusieurs comptes email en parallèle via le même proxy.
Retourne la liste consolidée de tous les MailResult trouvés.
"""
all_results: List[MailResult] = []
with ThreadPoolExecutor(max_workers=max_workers) as executor:
future_map = {
executor.submit(
ProxyMailReader(acc, proxy, timeout).read, since
): acc.login
for acc in accounts
}
for future in as_completed(future_map):
login = future_map[future]
try:
results = future.result()
logger.info("[%s] %d email(s) de validation récupéré(s).",
login, len(results))
all_results.extend(results)
except Exception as exc:
logger.error("[%s] Erreur : %s", login, exc)
return all_results
# ──────────────────────────────────────────────────────────────
# Point d'entrée — exemple d'utilisation
# ──────────────────────────────────────────────────────────────
if __name__ == "__main__":
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-8s %(message)s",
datefmt="%H:%M:%S",
)
# ── 1. Configurer le proxy ───────────────────────────────
proxy = ProxyConfig(
host=os.environ.get("GMX_PROXY_HOST", ""),
port=int(os.environ.get("GMX_PROXY_PORT", "443")),
proxy_type=os.environ.get("GMX_PROXY_TYPE", "SOCKS5"),
username=os.environ.get("GMX_PROXY_USERNAME"),
password=os.environ.get("GMX_PROXY_PASSWORD"),
)
# ── 2. Définir les comptes à lire ────────────────────────
accounts = [
MailAccount(login="birgitnaya@gmx.net", password="XEeUF3Y1yaO"),
# MailAccount(login="user@gmail.com", password="apppassword"),
# MailAccount(login="user@outlook.com", password="password"),
]
# ── 3. Lancer la lecture ─────────────────────────────────
results = read_multiple_accounts(
accounts=accounts,
proxy=proxy,
since=datetime.datetime.today(),
max_workers=5,
timeout=30.0,
)
# ── 4. Afficher les résultats ────────────────────────────
print(f"\n{'='*60}")
print(f" {len(results)} email(s) de validation trouvé(s)")
print(f"{'='*60}\n")
for r in results:
print(f" Compte : {r.account}")
print(f" De : {r.from_address}")
print(f" Sujet : {r.subject}")
print(f" URLs : {r.validation_urls or 'aucune'}")
print(f" {'-'*56}")
+137 -18
View File
@@ -1,19 +1,25 @@
import datetime import datetime
import email import email
import logging import logging
import os
import re import re
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from email.header import decode_header from email.header import decode_header
from email.message import Message from email.message import Message
from typing import Union, List from typing import Union, List, Optional
from dotenv import load_dotenv
from imapclient import IMAPClient from imapclient import IMAPClient
from db.mongo_manager import MONGO_STORE_MANAGER from db.mongo_manager import MONGO_STORE_MANAGER
from excel_reader import read_contacts from excel_reader import read_contacts
from mail.mail_constants import DOMAIN_HOTMAIL, create_imap from mail.mail_constants import DOMAIN_HOTMAIL, create_imap
from mail.imap_proxy_reader import ProxyIMAPClient, ProxyConfig, get_imap_server
from models.ReserveResultPojo import ReserveResultPojo from models.ReserveResultPojo import ReserveResultPojo
from models.mail_pojo import MailPojo, MailAddress from models.mail_pojo import MailPojo, MailAddress
# Charger les variables d'environnement depuis .env
load_dotenv()
# 定义常量 # 定义常量
VALIDATION_URL_SUBJECT_FR = 'Validation de votre demande de rendez-vous' VALIDATION_URL_SUBJECT_FR = 'Validation de votre demande de rendez-vous'
VALIDATION_URL_SUBJECT_EN = 'Please confirm your appointment request' VALIDATION_URL_SUBJECT_EN = 'Please confirm your appointment request'
@@ -25,6 +31,30 @@ EMAIL_ADDRESS_REGEX = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b'
# 日期格式 # 日期格式
DATE_FORMAT = "%d-%b-%Y" DATE_FORMAT = "%d-%b-%Y"
# Timeouts GMX (en secondes)
IMAP_SOCKET_TIMEOUT = 300 # timeout socket pour chaque opération IMAP
FUTURE_TIMEOUT = 600 # durée max allouée à la lecture d'une boîte mail
# GMX域名列表(用于判断是否需要使用代理)
GMX_DOMAINS = (
"gmx.com", "gmx.net", "gmx.de", "gmx.at",
"gmx.fr", "gmx.us", "gmx.sg", "gmx.ch", "gmx.pt",
)
# 需要通过代理读取的域名列表
# PROXY_DOMAINS = GMX_DOMAINS + ("inbox.lv",)
PROXY_DOMAINS = GMX_DOMAINS
def is_gmx_account(login: str) -> bool:
"""判断邮箱是否属于GMX域名"""
return any(d in login.lower() for d in GMX_DOMAINS)
def is_proxy_account(login: str) -> bool:
"""判断邮箱是否需要通过代理读取(GMX 或 inbox.lv"""
return any(d in login.lower() for d in PROXY_DOMAINS)
# 邮箱列表(简化为常量) # 邮箱列表(简化为常量)
REDIRECTION_MAILS = "appointment2022@aol.com, chenpeijun@aol.com,hongjiang176@aol.com,ciyuexie@aol.com,rutger.62@aol.com,ciccidaniel@aol.com,armasgoodman@aol.com,wknd.gemerine@aol.com,rafmail1981@aol.com,tonovichivanenaki@aol.com,hetland.ari@aol.com,mateusiversen@aol.com,lacerdaraffaello@aol.com,anasida76@aol.com,liamolinari@aol.com,sen70zib@aol.com,mezeiderrick@aol.com,stanisl49avchic@aol.com,damcvrobaneuron@aol.com,suyzanna_fleona@aol.com,dxealing.dissa@aol.com,hogg.karen@aol.com,obocharovamarina@aol.com,buchholzjohann@aol.com,orn.cecchini@aol.com,percivaltorgersen@aol.com,candalgudrun@aol.com,filimonis.76@aol.com,bengann_100@aol.com,axelhanne@aol.com,tiffanylarochelle@aol.com,nicoleta.r@aol.com,eichenbaum.1963@aol.com,kotensasharev@aol.com,samognat32@aol.com,edem_headshot@aol.com,kozmakuzmich1960@aol.com,damonsvensson@aol.com,anders.riva@aol.com,caiminwei123@gmail.com,yulingguo086@gmail.com,yingxiaolu086@gmail.com,lijiazhen0035@gmail.com,fangp370@gmail.com,huangyayu10086@gmail.com,fuziyuan110@gmail.com,xinyingdu886@gmail.com,yasiaforever.1971@aol.com,lukaszfidalgo@aol.com,zaichi29@aol.com,prostotakitak.1974@aol.com,mo90nroe@aol.com,blonde.87@aol.com,dimidrol.1969@aol.com" REDIRECTION_MAILS = "appointment2022@aol.com, chenpeijun@aol.com,hongjiang176@aol.com,ciyuexie@aol.com,rutger.62@aol.com,ciccidaniel@aol.com,armasgoodman@aol.com,wknd.gemerine@aol.com,rafmail1981@aol.com,tonovichivanenaki@aol.com,hetland.ari@aol.com,mateusiversen@aol.com,lacerdaraffaello@aol.com,anasida76@aol.com,liamolinari@aol.com,sen70zib@aol.com,mezeiderrick@aol.com,stanisl49avchic@aol.com,damcvrobaneuron@aol.com,suyzanna_fleona@aol.com,dxealing.dissa@aol.com,hogg.karen@aol.com,obocharovamarina@aol.com,buchholzjohann@aol.com,orn.cecchini@aol.com,percivaltorgersen@aol.com,candalgudrun@aol.com,filimonis.76@aol.com,bengann_100@aol.com,axelhanne@aol.com,tiffanylarochelle@aol.com,nicoleta.r@aol.com,eichenbaum.1963@aol.com,kotensasharev@aol.com,samognat32@aol.com,edem_headshot@aol.com,kozmakuzmich1960@aol.com,damonsvensson@aol.com,anders.riva@aol.com,caiminwei123@gmail.com,yulingguo086@gmail.com,yingxiaolu086@gmail.com,lijiazhen0035@gmail.com,fangp370@gmail.com,huangyayu10086@gmail.com,fuziyuan110@gmail.com,xinyingdu886@gmail.com,yasiaforever.1971@aol.com,lukaszfidalgo@aol.com,zaichi29@aol.com,prostotakitak.1974@aol.com,mo90nroe@aol.com,blonde.87@aol.com,dimidrol.1969@aol.com"
@@ -64,9 +94,12 @@ def find_from_mail(param) -> str:
class MailReader: class MailReader:
"""邮件读取器类""" """邮件读取器类"""
def __init__(self, login: str, password: str): def __init__(self, login: str, password: str, proxy: Optional[ProxyConfig] = None,
failed_gmx_list: Optional[List[str]] = None):
self.login = login self.login = login
self.password = password self.password = password
self.proxy = proxy
self.failed_gmx_list = failed_gmx_list if failed_gmx_list is not None else []
@staticmethod @staticmethod
def show_folders(imap) -> List[str]: def show_folders(imap) -> List[str]:
@@ -90,7 +123,45 @@ class MailReader:
def read_emails(self, mails_messages: List[MailPojo]) -> List[MailPojo]: def read_emails(self, mails_messages: List[MailPojo]) -> List[MailPojo]:
"""读取邮件""" """读取邮件"""
imap = create_imap(self.login) # ── GMX / inbox.lv 账户 → 使用代理连接(失败自动重试最多3次)──
if is_proxy_account(self.login) and self.proxy is not None:
return self._read_emails_with_proxy_retry(mails_messages)
else:
return self._read_emails_internal(create_imap(self.login), mails_messages)
def _read_emails_with_proxy_retry(
self,
mails_messages: List[MailPojo],
max_retries: int = 8,
) -> List[MailPojo]:
"""通过 ProxyIMAPClient 读取邮件(GMX / inbox.lv),失败时最多重试 max_retries 次。"""
imap_server = get_imap_server(self.login)
last_error: Optional[Exception] = None
for attempt in range(1, max_retries + 1):
try:
print("[Proxy] {}{} via {} (tentative {}/{})".format(
self.login, imap_server, self.proxy, attempt, max_retries))
imap = ProxyIMAPClient(
host=imap_server,
proxy=self.proxy,
use_uid=True,
ssl=True,
timeout=IMAP_SOCKET_TIMEOUT,
)
return self._read_emails_internal(imap, mails_messages)
except Exception as exc:
last_error = exc
print("[Proxy] Échec tentative {}/{} pour {} : {}".format(
attempt, max_retries, self.login, exc))
print("[Proxy] Toutes les tentatives ont échoué pour {} : {}".format(
self.login, last_error))
self.failed_gmx_list.append(self.login)
return []
def _read_emails_internal(self, imap, mails_messages: List[MailPojo]) -> List[MailPojo]:
"""Logique commune de lecture des emails (IMAPClient ou imaplib)."""
is_imap_client = isinstance(imap, IMAPClient) is_imap_client = isinstance(imap, IMAPClient)
# 登录邮箱 # 登录邮箱
@@ -337,10 +408,6 @@ def need_to_check_email(mail: str, successful_items) -> bool:
"""判断是否需要检查邮件""" """判断是否需要检查邮件"""
print("successful_items size is " + str(len(successful_items))) print("successful_items size is " + str(len(successful_items)))
# 特殊处理
if mail == "saigecong1990@pissmail.com":
return True
# 过滤已验证的项目 # 过滤已验证的项目
filtered_items = [item for item in successful_items if item.email == mail] filtered_items = [item for item in successful_items if item.email == mail]
@@ -351,34 +418,50 @@ def need_to_check_email(mail: str, successful_items) -> bool:
return len(validated_items) == 0 return len(validated_items) == 0
def find_links_to_validate_from_mail_list(mail_list: List[MailAddress], logger) -> None: def find_links_to_validate_from_mail_list(
"""从邮件列表中查找需要验证的链接""" mail_list: List[MailAddress],
logger,
proxy: Optional[ProxyConfig] = None,
) -> List[str]:
"""从邮件列表中查找需要验证的链接,返回读取失败的GMX账户列表"""
if not mail_list: if not mail_list:
return return []
# 检查时间前开始检查邮件 # 检查时间前开始检查邮件
contact_to_book_list = MONGO_STORE_MANAGER.get_all_contact_to_book_list() contact_to_book_list = MONGO_STORE_MANAGER.get_all_contact_to_book_list()
successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day() successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
mails_messages = [] mails_messages = []
failed_gmx: List[str] = []
# 使用线程池处理邮件 # 使用线程池处理邮件
with ThreadPoolExecutor(max_workers=20) as executor: with ThreadPoolExecutor(max_workers=100) as executor:
futures = [] futures = []
for mail in mail_list: for mail in mail_list:
# 检查是否需要读取邮件 # 检查是否需要读取邮件
if need_to_check_email(mail.mail, successful_items): if need_to_check_email(mail.mail, successful_items):
mail_reader = MailReader(mail.mail, mail.password) mail_reader = MailReader(mail.mail, mail.password, proxy=proxy,
failed_gmx_list=failed_gmx)
future = executor.submit(mail_reader.read_emails, mails_messages) future = executor.submit(mail_reader.read_emails, mails_messages)
futures.append(future) futures.append(future)
# 等待所有任务完成 # 等待所有任务完成
for future in futures: for future in futures:
try: try:
future.result() future.result(timeout=FUTURE_TIMEOUT)
except TimeoutError:
print("⏱️ Timeout ({} s) dépassé pour une boîte mail — lecture ignorée.".format(FUTURE_TIMEOUT))
except Exception as e: except Exception as e:
print("Error processing mail: {}".format(e)) print("Error processing mail: {},login: {}, password: {}".format(e,mail.mail, mail.password))
# ── Résumé des comptes proxy en échec ──────────────────────
if failed_gmx:
print("\n[Proxy] ⚠️ {} compte(s) non lus (GMX / inbox.lv) :".format(len(failed_gmx)))
for addr in failed_gmx:
print("{}".format(addr))
else:
print("\n[Proxy] ✅ Tous les comptes GMX / inbox.lv ont été lus avec succès.")
# 刷新成功的项目 # 刷新成功的项目
_refreshed_successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day() _refreshed_successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
@@ -406,12 +489,24 @@ def find_links_to_validate_from_mail_list(mail_list: List[MailAddress], logger)
else: else:
logger.info("do not need to click url --> {}".format(mail.mail_address)) logger.info("do not need to click url --> {}".format(mail.mail_address))
return failed_gmx
# 主函数 # 主函数
if __name__ == '__main__': if __name__ == '__main__':
# 读取联系人列表 # 读取联系人列表
contact_to_book_list = read_contacts( contact_to_book_list = read_contacts(
file_name="~/Desktop/contact_list_2025-11-06.xlsx") # file_name="~/Desktop/contact_list_inbox_lv_100.xlsx")
# file_name="~/Desktop/contact_list_2026-04-21_200_yahoo.xlsx")
# file_name="~/Desktop/contact_list_yahoo_100_20_04.xlsx")
# file_name="~/Desktop/contact_yahoo_5.xlsx")
file_name="~/Desktop/contact_list_2026-04-22.xlsx")
# file_name="~/Desktop/contact_list_2026-04-11.xlsx")
# file_name="~/Desktop/contact_list_2026-04-17.xlsx")
# file_name="~/Desktop/contact_list_inbox_100_14_04.xlsx")
# file_name="~/Desktop/contact_list_2024-09-02_firemail_de_100.xlsx")
# file_name="~/Desktop/reste_inbox_lv.xlsx")
# file_name="~/Desktop/yahooo_list.xlsx")
# 获取目标邮箱列表 # 获取目标邮箱列表
all_mail_list = MONGO_STORE_MANAGER.get_destination_emails() all_mail_list = MONGO_STORE_MANAGER.get_destination_emails()
@@ -425,7 +520,6 @@ if __name__ == '__main__':
# 设置日志记录器 # 设置日志记录器
logger = logging.getLogger() logger = logging.getLogger()
# 获取已验证的链接列表 # 获取已验证的链接列表
_all_links = MONGO_STORE_MANAGER.get_links_to_validate() _all_links = MONGO_STORE_MANAGER.get_links_to_validate()
@@ -438,6 +532,31 @@ if __name__ == '__main__':
_to_add = False _to_add = False
if _to_add: if _to_add:
filter_mail.append(mail_pojo) filter_mail.append(mail_pojo)
# filter_mail = [MailAddress("utatapi@gmx.net", "RSAzHAFek8s")] # filter_mail = [MailAddress("minnakan@firemail.de", "Yjn8nQ0sZ")]
# ── Mode de lecture : GMX_ONLY=true → uniquement les comptes GMX ──
gmx_only = os.environ.get("GMX_ONLY", "false").strip().lower() == "true"
if gmx_only:
filter_mail = [m for m in filter_mail if is_gmx_account(m.mail)]
print("[Mode] Lecture GMX uniquement ({} comptes)".format(len(filter_mail)))
else:
print("[Mode] Lecture de tous les comptes ({} comptes)".format(len(filter_mail)))
# 配置代理(GMX账号必须通过代理读取)
gmx_proxy = ProxyConfig(
host=os.environ.get("GMX_PROXY_HOST", ""),
port=int(os.environ.get("GMX_PROXY_PORT", "443")),
proxy_type=os.environ.get("GMX_PROXY_TYPE", "SOCKS5"),
username=os.environ.get("GMX_PROXY_USERNAME"),
password=os.environ.get("GMX_PROXY_PASSWORD"),
)
# 处理邮件 # 处理邮件
find_links_to_validate_from_mail_list(filter_mail, logger) failed = find_links_to_validate_from_mail_list(filter_mail, logger, proxy=gmx_proxy)
# ── Afficher les comptes GMX non lus ─────────────────────
if failed:
print("\n===== Comptes GMX non lus ({}) =====".format(len(failed)))
for addr in failed:
print("{}".format(addr))
else:
print("\n===== Tous les comptes GMX ont été lus avec succès =====")
+6 -5
View File
@@ -1,13 +1,14 @@
from multiprocessing import Process from multiprocessing import Process
from queue_message.CookiesPublisher import MORNING_DATA_CACHE_2, MORNING_DATA_CACHE, MORNING_DATA_CACHE_BAK from queue_message.CookiesPublisher import MORNING_DATA_CACHE_2, MORNING_DATA_CACHE, MORNING_DATA_CACHE_BAK, \
REGISTER_QUEUE
from workers.MessagerTransporter import migrate_message_to_queue from workers.MessagerTransporter import migrate_message_to_queue
if __name__ == '__main__': if __name__ == '__main__':
# p1 = Process(target=migrate_message_to_queue, args=(MORNING_DATA_CACHE_2, MORNING_DATA_CACHE_BAK)) p1 = Process(target=migrate_message_to_queue, args=(MORNING_DATA_CACHE, MORNING_DATA_CACHE_BAK))
# p1.start() p1.start()
p2 = Process(target=migrate_message_to_queue, args=(MORNING_DATA_CACHE, MORNING_DATA_CACHE_BAK)) p2 = Process(target=migrate_message_to_queue, args=(MORNING_DATA_CACHE_2, MORNING_DATA_CACHE_BAK))
p2.start() p2.start()
# p2.join() p2.join()
# migrate_message_to_queue(from_queue=MORNING_DATA_CACHE_2) # migrate_message_to_queue(from_queue=MORNING_DATA_CACHE_2)
# migrate_message_to_queue(from_queue=MORNING_DATA_CACHE) # migrate_message_to_queue(from_queue=MORNING_DATA_CACHE)
+1 -1
View File
@@ -93,7 +93,7 @@ class ProxyManager:
return [FR_PROXY_RES_OXY, FR_PROXY_ASOCK_RES_2, FR_DATA_IMPULSE_RES, FR_ASOCKS_MOBILE_PROXY] return [FR_PROXY_RES_OXY, FR_PROXY_ASOCK_RES_2, FR_DATA_IMPULSE_RES, FR_ASOCKS_MOBILE_PROXY]
def get_proxy_for_appointment_request(self) -> dict: def get_proxy_for_appointment_request(self) -> dict:
# return self.get_random_sticky_iproyal_proxy() return self.get_random_sticky_iproyal_proxy()
_chosen_proxy = random.choice(MOBILE_PROXY_LIST) _chosen_proxy = random.choice(MOBILE_PROXY_LIST)
if "oxylabs" in _chosen_proxy["http"]: if "oxylabs" in _chosen_proxy["http"]:
self.logger.info("use oxylabs proxy") self.logger.info("use oxylabs proxy")
+12 -7
View File
@@ -99,11 +99,16 @@ def send_request_for_file_list(file_list: list, thread_number: int = 20, data_qu
if __name__ == '__main__': if __name__ == '__main__':
# file_list = ['~/Desktop/contact_list_2024-05-23.xlsx', # file_list = ['~/Desktop/contact_list_2024-05-23.xlsx',
# '~/Desktop/contact_list_2024-05-21.xlsx', # '~/Desktop/contact_list_2024-05-21.xlsx',
# '~/Desktop/15_05_to_test.xlsx'] # file_list = ['~/Desktop/contact_list_2026-04-15.xlsx']
# file_list = ['~/Desktop/contact_list_2025-10-30.xlsx'] # file_list = ['~/Desktop/contact_yahoo_5.xlsx']
file_list = ['~/Desktop/contact_list_2025-11-06.xlsx'] # file_list = ['~/Desktop/contact_list_inbox_lv_100.xlsx']
# file_list = ['~/Desktop/contact_list_all.xlsx'] # file_list = ['~/Desktop/contact_list_yahoo_100_20_04.xlsx']
# file_list = ['~/Desktop/contact_list_2025-09-08.xlsx'] # file_list = ['~/Desktop/contact_list_2026-04-21_200_yahoo.xlsx']
# file_list = ['~/Desktop/real_name_contacts_100_aol_17_04.xlsx'] file_list = ['~/Desktop/contact_list_2026-04-21.xlsx']
send_request_for_file_list(file_list=file_list, thread_number=30, # file_list = ['~/Desktop/reste_inbox_lv.xlsx']
# file_list = ['~/Desktop/contact_list_2024-09-02_firemail_de_100.xlsx']
# file_list = ['~/Desktop/contact_list_inbox_100_14_04.xlsx']
# file_list = file_list[0:100]
# file_list = ['~/Desktop/contact_list_2026-03-28.xlsx']
send_request_for_file_list(file_list=file_list, thread_number=2,
data_queue_name=MORNING_DATA_CACHE, stop_at_hour=19, stop_at_mins=50) data_queue_name=MORNING_DATA_CACHE, stop_at_hour=19, stop_at_mins=50)
+3
View File
@@ -3,3 +3,6 @@ curl_cffi==0.7.1
openpyxl openpyxl
pika pika
schedule schedule
python-dotenv
PySocks
imapclient
+2 -2
View File
@@ -5,8 +5,8 @@ from link_validator_executor import start_link_validation
def start_check_results_job(sched): def start_check_results_job(sched):
sched.add_job(start_link_validation, 'cron', day_of_week='mon-sat', hour='13', sched.add_job(start_link_validation, 'cron', day_of_week='mon-sun', hour='14',
minute='48', minute='10',
misfire_grace_time=10, misfire_grace_time=10,
second='10', timezone='Europe/Paris', max_instances=1, args=[]) second='10', timezone='Europe/Paris', max_instances=1, args=[])
+3 -3
View File
@@ -7,13 +7,13 @@ from request_sender_test import send_request_for_file_list
def start_book_appointment(): def start_book_appointment():
# file_list = ['~/Desktop/contact_list_2025-09-08.xlsx'] # file_list = ['~/Desktop/contact_list_2025-09-08.xlsx']
file_list = ['~/Desktop/contact_list_2025-11-06.xlsx'] file_list = ['~/Desktop/contact_list_2025-11-28.xlsx']
send_request_for_file_list(file_list=file_list, thread_number=70, send_request_for_file_list(file_list=file_list, thread_number=73,
data_queue_name=MORNING_DATA_CACHE, stop_at_hour=11, stop_at_mins=10) data_queue_name=MORNING_DATA_CACHE, stop_at_hour=11, stop_at_mins=10)
def start_check_results_job(sched): def start_check_results_job(sched):
sched.add_job(start_book_appointment, 'cron', day_of_week='mon-sat', hour='10', sched.add_job(start_book_appointment, 'cron', day_of_week='mon-sun', hour='10',
minute='30', minute='30',
misfire_grace_time=10, misfire_grace_time=10,
second='10', timezone='Europe/Paris', max_instances=1, args=[]) second='10', timezone='Europe/Paris', max_instances=1, args=[])
+162
View File
@@ -0,0 +1,162 @@
import subprocess
import os
import datetime
import sys
from db.mongo_manager import MONGO_DB_URL
# ================= 配置区域 =================
# 数据库连接信息
MONGO_HOST = "mongo.lpaconsulting.fr"
MONGO_PORT = "27017"
MONGO_DB_NAME = "appointment" # 你要备份/恢复的数据库名
# Get MongoDB credentials from environment variables
MONGO_USER = os.getenv(
"MONGO_USER", "appointment"
) # Default to 'appointment' if not set
MONGO_PASS = os.getenv("MONGO_PASS", "Rdv@2022") # Default to 'Rdv@2022' if not set
# 备份存放的根目录
BACKUP_DIR_ROOT = "./mongo_backups"
# ===========================================
def get_auth_args():
"""构建认证参数列表"""
args = []
if MONGO_USER and MONGO_PASS:
args.extend(
[
"--username",
MONGO_USER,
"--password",
MONGO_PASS,
"--authenticationDatabase",
"appointment",
]
)
return args
def backup_mongo():
"""执行备份操作"""
# 1. 创建带有时间戳的备份文件夹
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
backup_path = os.path.join(BACKUP_DIR_ROOT, timestamp)
if not os.path.exists(backup_path):
os.makedirs(backup_path)
print(f"[*] 开始备份数据库: {MONGO_DB_NAME}{backup_path} ...")
# 2. 构建 mongodump 命令
# 命令格式: mongodump --host <host> --port <port> --db <db> --out <path> [auth]
cmd = [
"mongodump",
"--host",
MONGO_HOST,
"--port",
MONGO_PORT,
"--db",
MONGO_DB_NAME,
"--out",
backup_path,
]
# 添加认证参数
cmd.extend(get_auth_args())
try:
# 3. 执行命令
result = subprocess.run(cmd, check=True, text=True, capture_output=True)
print(f"[+] 备份成功!")
print(f" 存储路径: {backup_path}")
print(f" 日志: {result.stderr}") # mongodump 通常把进度输出到 stderr
return backup_path
except subprocess.CalledProcessError as e:
print(f"[-] 备份失败: {e}")
print(f" 错误信息: {e.stderr}")
return None
def restore_mongo(backup_source_path):
"""
执行恢复操作
backup_source_path: 备份文件夹的路径 (例如 ./mongo_backups/2023-10-27_10-00-00)
"""
# mongodump 的输出结构通常是: backup_dir/db_name/collection.bson
# 所以我们需要指向具体的数据库文件夹,或者指向父文件夹并指定 --db
target_dir = os.path.join(backup_source_path, MONGO_DB_NAME)
if not os.path.exists(target_dir):
print(
f"[-] 错误: 在路径 {backup_source_path} 下找不到数据库 {MONGO_DB_NAME} 的备份文件。"
)
return
print(f"[*] 开始恢复数据库: {MONGO_DB_NAME}{target_dir} ...")
# 构建 mongorestore 命令
# 命令格式: mongorestore --host <host> --port <port> --db <db> <path_to_bson_files> [auth]
cmd = [
"mongorestore",
"--host",
MONGO_HOST,
"--port",
MONGO_PORT,
"--db",
MONGO_DB_NAME,
"--drop", # 警告:这会在恢复前删除现有集合,确保数据干净。根据需要移除此项。
target_dir,
]
cmd.extend(get_auth_args())
try:
result = subprocess.run(cmd, check=True, text=True, capture_output=True)
print(f"[+] 恢复成功!")
print(f" 日志: {result.stderr}")
except subprocess.CalledProcessError as e:
print(f"[-] 恢复失败: {e}")
print(f" 错误信息: {e.stderr}")
# ================= 主程序入口 =================
if __name__ == "__main__":
print("请选择操作:")
print("1. 备份数据库 (Backup)")
print("2. 恢复数据库 (Restore)")
choice = input("请输入数字 (1/2): ").strip()
if choice == "1":
backup_mongo()
elif choice == "2":
# 列出所有备份供用户选择
if not os.path.exists(BACKUP_DIR_ROOT):
print("[-] 没有找到备份目录。")
else:
backups = sorted(os.listdir(BACKUP_DIR_ROOT))
if not backups:
print("[-] 目录为空。")
else:
print("\n可用备份:")
for idx, name in enumerate(backups):
print(f"{idx + 1}. {name}")
try:
idx_choice = int(input("\n请选择要恢复的备份编号: ")) - 1
if 0 <= idx_choice < len(backups):
selected_backup = os.path.join(
BACKUP_DIR_ROOT, backups[idx_choice]
)
restore_mongo(selected_backup)
else:
print("[-] 无效的选择。")
except ValueError:
print("[-] 请输入数字。")
else:
print("[-] 无效输入,退出。")
+6 -6
View File
@@ -14,13 +14,13 @@ from models.jsdata_pojo import JsDataPojo
from models.result_pojo import RequestResult from models.result_pojo import RequestResult
from utils.get_only_datadome_cookies import get_datadome_cookies, get_app_cookies, get_lang_cookies, \ from utils.get_only_datadome_cookies import get_datadome_cookies, get_app_cookies, get_lang_cookies, \
retain_only_dataome_cookies retain_only_dataome_cookies
from workers.proxy_constants import PROXY_TIMEOUT_S
API_KEY = "d66aaf490d8aa424a5175e1fbd1aadea" API_KEY = "d66aaf490d8aa424a5175e1fbd1aadea"
HOST_ADDRESS = "https://api.2captcha.com/createTask" HOST_ADDRESS = "https://api.2captcha.com/createTask"
HERMES_REGISTER = "https://rendezvousparis.hermes.com/client/register" HERMES_REGISTER = "https://rendezvousparis.hermes.com/client/register"
class CaptchaResultGetter: class CaptchaResultGetter:
def __init__(self): def __init__(self):
@@ -51,7 +51,7 @@ class CaptchaResultGetter:
print(proxy_to_use) print(proxy_to_use)
try: try:
response = requests.get(url=HERMES_REGISTER, headers=headers, verify=False, proxies=proxy_to_use, response = requests.get(url=HERMES_REGISTER, headers=headers, verify=False, proxies=proxy_to_use,
timeout=15) timeout=PROXY_TIMEOUT_S)
print(response.status_code) print(response.status_code)
if response.status_code == 200: if response.status_code == 200:
print(response.text) print(response.text)
@@ -94,7 +94,7 @@ class CaptchaResultGetter:
raw_data = self.get_le_type_raw_data(old_valid_cookie=old_valid_cookie, js_le_type_data=js_le_type_data) raw_data = self.get_le_type_raw_data(old_valid_cookie=old_valid_cookie, js_le_type_data=js_le_type_data)
response = requests.post(url="https://d.digital.hermes/js/", headers=headers, verify=False, response = requests.post(url="https://d.digital.hermes/js/", headers=headers, verify=False,
data=raw_data, data=raw_data,
proxies=proxy_to_use, timeout=15) proxies=proxy_to_use, timeout=PROXY_TIMEOUT_S)
# print(response.status_code) # print(response.status_code)
if response.status_code == 200: if response.status_code == 200:
print(response.text) print(response.text)
@@ -109,7 +109,7 @@ class CaptchaResultGetter:
return None return None
def get_ch_raw_data_from_js_data(self, js_data: JsDataPojo, old_valid_cookie) -> str: def get_ch_raw_data_from_js_data(self, js_data: JsDataPojo, old_valid_cookie) -> str:
_tag_version = "5.1.9" _tag_version = "5.1.12"
_jspl = encrpte_to_jspl(js_data.to_url_encoded_json()) _jspl = encrpte_to_jspl(js_data.to_url_encoded_json())
_raw_data = "jspl={}&eventCounters=%5B%5D&jsType=ch&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv={}".format( _raw_data = "jspl={}&eventCounters=%5B%5D&jsType=ch&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv={}".format(
_jspl, old_valid_cookie, _tag_version) _jspl, old_valid_cookie, _tag_version)
@@ -149,7 +149,7 @@ class CaptchaResultGetter:
response = requests.post(url="https://d.digital.hermes/js/", headers=headers, verify=False, response = requests.post(url="https://d.digital.hermes/js/", headers=headers, verify=False,
data=self.get_ch_raw_data_from_js_data(js_data=js_data, data=self.get_ch_raw_data_from_js_data(js_data=js_data,
old_valid_cookie=_cookies_to_use), old_valid_cookie=_cookies_to_use),
proxies=proxy_to_use, timeout=15) proxies=proxy_to_use, timeout=PROXY_TIMEOUT_S)
print(response.status_code) print(response.status_code)
if response.status_code == 200: if response.status_code == 200:
print(response.text) print(response.text)
@@ -180,7 +180,7 @@ class CaptchaResultGetter:
# old_valid_cookie=old_valid_cookie) # old_valid_cookie=old_valid_cookie)
_cid = get_datadome_cookies(old_valid_cookie) _cid = get_datadome_cookies(old_valid_cookie)
_jspl = encrpte_to_jspl(js_le_type_data.to_url_encoded_json()) _jspl = encrpte_to_jspl(js_le_type_data.to_url_encoded_json())
_raw_data = "jsData={}&eventCounters=%7B%22mousemove%22%3A{}%2C%22click%22%3A{}%2C%22scroll%22%3A{}%2C%22touchstart%22%3A{}%2C%22touchend%22%3A{}%2C%22touchmove%22%3A{}%2C%22keydown%22%3A{}%2C%22keyup%22%3A{}%7D&jsType=le&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv=5.1.9".format( _raw_data = "jsData={}&eventCounters=%7B%22mousemove%22%3A{}%2C%22click%22%3A{}%2C%22scroll%22%3A{}%2C%22touchstart%22%3A{}%2C%22touchend%22%3A{}%2C%22touchmove%22%3A{}%2C%22keydown%22%3A{}%2C%22keyup%22%3A{}%7D&jsType=le&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv=5.1.12".format(
_jspl, mousemove_count, click_count, scroll_count, touch_count, touch_count, _jspl, mousemove_count, click_count, scroll_count, touch_count, touch_count,
touch_move, touch_move,
key_count, key_count,
+1
View File
@@ -0,0 +1 @@
PROXY_TIMEOUT_S = 20
+2 -1
View File
@@ -13,6 +13,7 @@ from models.result_pojo import RequestResult
from queue_message.CookiesPublisher import CookiesPublisher from queue_message.CookiesPublisher import CookiesPublisher
from utils.address_ip import get_address_ip from utils.address_ip import get_address_ip
from workers.captcha_result_getter import CaptchaResultGetter from workers.captcha_result_getter import CaptchaResultGetter
from workers.proxy_constants import PROXY_TIMEOUT_S
def get_chrome_version_from_ua(ua): def get_chrome_version_from_ua(ua):
@@ -206,7 +207,7 @@ class Sender:
proxy_to_use = self.proxy_to_use proxy_to_use = self.proxy_to_use
# print(proxy_to_use) # print(proxy_to_use)
response = requests.post(url=url, proxies=proxy_to_use, verify=False, headers=headers, data=data, response = requests.post(url=url, proxies=proxy_to_use, verify=False, headers=headers, data=data,
timeout=15, allow_redirects=False, impersonate="chrome99_android") timeout=PROXY_TIMEOUT_S, allow_redirects=False, impersonate="chrome99_android")
self.logger.info(response.status_code) self.logger.info(response.status_code)
if response.status_code == 302: if response.status_code == 302:
# add to mongodb # add to mongodb