Compare commits

...

16 Commits

10 changed files with 288 additions and 63 deletions
+94 -33
View File
@@ -1,6 +1,7 @@
import datetime
import logging
import time
import os
from pymongo import MongoClient
@@ -11,7 +12,7 @@ from models.contact_pojo import ContactPojo
from models.mail_pojo import MailAddress
from models.regisered_user_pojo import RegisteredUserPojo
MONGO_DB_URL = "mongodb://mongo.lpaconsulting.fr/?timeoutMS=100000"
MONGO_DB_URL = "mongodb://mongo2.lpaconsulting.fr/?timeoutMS=100000"
CAPTCHA_ERROR_COLLECTION_PREFIX = "CAPTCHA_ERROR_"
BLACK_LIST = "BLACK_LIST"
ACCEPTED_APPOINTMENT_LIST = "ACCEPTED_APPOINTMENT_LIST"
@@ -25,7 +26,22 @@ CONTACT_LIST_SERIAL_MAP = "CONTACT_LIST_SERIAL_MAP"
class MongoDbManager:
def __init__(self):
client = MongoClient(MONGO_DB_URL, username='appointment', password='Rdv@2022', authSource='appointment')
# Get username and password from environment variables
mongo_username = os.getenv("MONGO_USERNAME")
mongo_password = os.getenv("MONGO_PASSWORD")
# Validate that environment variables exist
if not mongo_username or not mongo_password:
raise ValueError(
"MONGO_USERNAME and MONGO_PASSWORD environment variables must be set"
)
client = MongoClient(
MONGO_DB_URL,
username=mongo_username,
password=mongo_password,
authSource="appointment",
)
self.db = client.appointment
self.logger = logging.getLogger("mongoDb")
@@ -36,8 +52,13 @@ class MongoDbManager:
def insert_reserve_result(self, collection_name, reserve: ReserveResultPojo):
try:
collection_to_use = self.db[collection_name]
collection_to_use.replace_one(filter={'_id': reserve.id, }, replacement=reserve.to_firestore_dict(),
upsert=True)
collection_to_use.replace_one(
filter={
"_id": reserve.id,
},
replacement=reserve.to_firestore_dict(),
upsert=True,
)
except Exception as Error:
self.logger.info(Error)
@@ -83,8 +104,14 @@ class MongoDbManager:
result_list.append(ContactPojo.from_firestore_dict(document))
return result_list
def save_links_to_validate(self, link: str, mail_address: str, model: str,
_all_contact_list: list, _used_ip: str = ""):
def save_links_to_validate(
self,
link: str,
mail_address: str,
model: str,
_all_contact_list: list,
_used_ip: str = "",
):
collection_to_use = self.db[LINKS_TO_VALIDATE]
updated_at = time.strftime("%H:%M:%S", time.localtime())
_ip_country = "FR"
@@ -95,32 +122,42 @@ class MongoDbManager:
_ip_country = _contact.ip_country
if len(mail_address) > 0:
collection_to_use.replace_one(filter={'_id': mail_address, }, replacement={
u'url': link,
u'email': mail_address,
u'serial': serial,
u'model': model,
u'ip_country': _ip_country,
u'_used_ip': _used_ip,
"updated_at": updated_at
collection_to_use.replace_one(
filter={
"_id": mail_address,
},
upsert=True)
replacement={
"url": link,
"email": mail_address,
"serial": serial,
"model": model,
"ip_country": _ip_country,
"_used_ip": _used_ip,
"updated_at": updated_at,
},
upsert=True,
)
else:
collection_to_use.replace_one(filter={'_id': link, }, replacement={
u'url': link,
u'serial': serial,
u'model': model,
u'ip_country': _ip_country,
u'_used_ip': _used_ip,
"updated_at": updated_at
collection_to_use.replace_one(
filter={
"_id": link,
},
upsert=True)
replacement={
"url": link,
"serial": serial,
"model": model,
"ip_country": _ip_country,
"_used_ip": _used_ip,
"updated_at": updated_at,
},
upsert=True,
)
def get_code_for_email(self, email: str):
collection_name = DESTINATION_EMAIL_LIST
try:
collection_to_use = self.db[collection_name]
mailDocument = collection_to_use.find_one(filter={'_id': email})
mailDocument = collection_to_use.find_one(filter={"_id": email})
if mailDocument is not None:
return MailAddress.from_firestore_dict(mailDocument).password
else:
@@ -134,7 +171,9 @@ class MongoDbManager:
_cursor = self.db[_collection_name]
registered_user_list = []
for document in _cursor.find():
registered_user_list.append(RegisteredUserPojo.from_firestore_dict(document))
registered_user_list.append(
RegisteredUserPojo.from_firestore_dict(document)
)
return registered_user_list
def get_destination_emails(self) -> list:
@@ -167,8 +206,18 @@ class MongoDbManager:
self.logger.info(error)
return link_list
def link_validated_for_result(self, link: str, linkPojo: LinkPojo, state=True, is_duplicated=False,
is_invalid=False, segement_position=1, ua="", model="", timestamp_in_s: list = None):
def link_validated_for_result(
self,
link: str,
linkPojo: LinkPojo,
state=True,
is_duplicated=False,
is_invalid=False,
segement_position=1,
ua="",
model="",
timestamp_in_s: list = None,
):
if timestamp_in_s is None:
timestamp_in_s = []
print("link_validated_for_result() called with url = " + link)
@@ -181,7 +230,10 @@ class MongoDbManager:
print("link_validated_for_result() called with id = " + _id)
collection_name = str(datetime.date.today())
print("link_validated_for_result() called with collection_name = " + collection_name)
print(
"link_validated_for_result() called with collection_name = "
+ collection_name
)
collection = self.db[collection_name]
validated_at = time.strftime("%H:%M:%S", time.localtime())
@@ -190,18 +242,27 @@ class MongoDbManager:
validated_by = "Invalid"
if is_duplicated:
validated_by = "Double"
collection.find_one_and_update({'_id': _id}, {
"$set": {"url_validated": state, "validated_at": validated_at, "id": _id, "email": linkPojo.email,
collection.find_one_and_update(
{"_id": _id},
{
"$set": {
"url_validated": state,
"validated_at": validated_at,
"id": _id,
"email": linkPojo.email,
"url": link,
"validated_by_model": model,
"serial": linkPojo.serial,
"validated_by_ua": ua,
"timestamp_in_s": "-".join(str(x) for x in timestamp_in_s),
"validated_by": validated_by}},
upsert=True)
"validated_by": validated_by,
}
},
upsert=True,
)
# remove the link from db
collection_to_use = self.db[LINKS_TO_VALIDATE]
collection_to_use.delete_one({'_id': linkPojo.email})
collection_to_use.delete_one({"_id": linkPojo.email})
MONGO_STORE_MANAGER = MongoDbManager()
+4 -2
View File
@@ -363,7 +363,7 @@ def find_links_to_validate_from_mail_list(mail_list: List[MailAddress], logger)
mails_messages = []
# 使用线程池处理邮件
with ThreadPoolExecutor(max_workers=20) as executor:
with ThreadPoolExecutor(max_workers=100) as executor:
futures = []
for mail in mail_list:
@@ -411,7 +411,9 @@ def find_links_to_validate_from_mail_list(mail_list: List[MailAddress], logger)
if __name__ == '__main__':
# 读取联系人列表
contact_to_book_list = read_contacts(
file_name="~/Desktop/contact_list_2025-11-06.xlsx")
file_name="~/Desktop/contact_list_2025-11-28.xlsx")
# file_name="~/Desktop/contact_list_all.xlsx")
# file_name="~/Desktop/contact_list_2025-11-06.xlsx")
# 获取目标邮箱列表
all_mail_list = MONGO_STORE_MANAGER.get_destination_emails()
+3 -3
View File
@@ -4,10 +4,10 @@ from queue_message.CookiesPublisher import MORNING_DATA_CACHE_2, MORNING_DATA_CA
from workers.MessagerTransporter import migrate_message_to_queue
if __name__ == '__main__':
# p1 = Process(target=migrate_message_to_queue, args=(MORNING_DATA_CACHE_2, MORNING_DATA_CACHE_BAK))
# p1.start()
p1 = Process(target=migrate_message_to_queue, args=(MORNING_DATA_CACHE_2, MORNING_DATA_CACHE_BAK))
p1.start()
p2 = Process(target=migrate_message_to_queue, args=(MORNING_DATA_CACHE, MORNING_DATA_CACHE_BAK))
p2.start()
# p2.join()
p2.join()
# migrate_message_to_queue(from_queue=MORNING_DATA_CACHE_2)
# migrate_message_to_queue(from_queue=MORNING_DATA_CACHE)
+4 -6
View File
@@ -99,11 +99,9 @@ def send_request_for_file_list(file_list: list, thread_number: int = 20, data_qu
if __name__ == '__main__':
# file_list = ['~/Desktop/contact_list_2024-05-23.xlsx',
# '~/Desktop/contact_list_2024-05-21.xlsx',
# '~/Desktop/15_05_to_test.xlsx']
# file_list = ['~/Desktop/contact_list_2025-10-30.xlsx']
file_list = ['~/Desktop/contact_list_2025-11-06.xlsx']
file_list = ['~/Desktop/contact_list_2025-11-28.xlsx']
# file_list = ['~/Desktop/contact_list_2025-11-06.xlsx']
# file_list = ['~/Desktop/contact_list_all.xlsx']
# file_list = ['~/Desktop/contact_list_2025-09-08.xlsx']
# file_list = ['~/Desktop/real_name_contacts_100_aol_17_04.xlsx']
send_request_for_file_list(file_list=file_list, thread_number=30,
data_queue_name=MORNING_DATA_CACHE, stop_at_hour=19, stop_at_mins=50)
send_request_for_file_list(file_list=file_list, thread_number=20,
data_queue_name=MORNING_DATA_CACHE_2, stop_at_hour=19, stop_at_mins=50)
+2 -2
View File
@@ -5,8 +5,8 @@ from link_validator_executor import start_link_validation
def start_check_results_job(sched):
sched.add_job(start_link_validation, 'cron', day_of_week='mon-sat', hour='13',
minute='48',
sched.add_job(start_link_validation, 'cron', day_of_week='mon-sun', hour='14',
minute='10',
misfire_grace_time=10,
second='10', timezone='Europe/Paris', max_instances=1, args=[])
+3 -3
View File
@@ -7,13 +7,13 @@ from request_sender_test import send_request_for_file_list
def start_book_appointment():
# file_list = ['~/Desktop/contact_list_2025-09-08.xlsx']
file_list = ['~/Desktop/contact_list_2025-11-06.xlsx']
send_request_for_file_list(file_list=file_list, thread_number=70,
file_list = ['~/Desktop/contact_list_2025-11-28.xlsx']
send_request_for_file_list(file_list=file_list, thread_number=73,
data_queue_name=MORNING_DATA_CACHE, stop_at_hour=11, stop_at_mins=10)
def start_check_results_job(sched):
sched.add_job(start_book_appointment, 'cron', day_of_week='mon-sat', hour='10',
sched.add_job(start_book_appointment, 'cron', day_of_week='mon-sun', hour='10',
minute='30',
misfire_grace_time=10,
second='10', timezone='Europe/Paris', max_instances=1, args=[])
+162
View File
@@ -0,0 +1,162 @@
import subprocess
import os
import datetime
import sys
from db.mongo_manager import MONGO_DB_URL
# ================= 配置区域 =================
# 数据库连接信息
MONGO_HOST = "mongo.lpaconsulting.fr"
MONGO_PORT = "27017"
MONGO_DB_NAME = "appointment" # 你要备份/恢复的数据库名
# Get MongoDB credentials from environment variables
MONGO_USER = os.getenv(
"MONGO_USER", "appointment"
) # Default to 'appointment' if not set
MONGO_PASS = os.getenv("MONGO_PASS", "Rdv@2022") # Default to 'Rdv@2022' if not set
# 备份存放的根目录
BACKUP_DIR_ROOT = "./mongo_backups"
# ===========================================
def get_auth_args():
"""构建认证参数列表"""
args = []
if MONGO_USER and MONGO_PASS:
args.extend(
[
"--username",
MONGO_USER,
"--password",
MONGO_PASS,
"--authenticationDatabase",
"appointment",
]
)
return args
def backup_mongo():
"""执行备份操作"""
# 1. 创建带有时间戳的备份文件夹
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
backup_path = os.path.join(BACKUP_DIR_ROOT, timestamp)
if not os.path.exists(backup_path):
os.makedirs(backup_path)
print(f"[*] 开始备份数据库: {MONGO_DB_NAME}{backup_path} ...")
# 2. 构建 mongodump 命令
# 命令格式: mongodump --host <host> --port <port> --db <db> --out <path> [auth]
cmd = [
"mongodump",
"--host",
MONGO_HOST,
"--port",
MONGO_PORT,
"--db",
MONGO_DB_NAME,
"--out",
backup_path,
]
# 添加认证参数
cmd.extend(get_auth_args())
try:
# 3. 执行命令
result = subprocess.run(cmd, check=True, text=True, capture_output=True)
print(f"[+] 备份成功!")
print(f" 存储路径: {backup_path}")
print(f" 日志: {result.stderr}") # mongodump 通常把进度输出到 stderr
return backup_path
except subprocess.CalledProcessError as e:
print(f"[-] 备份失败: {e}")
print(f" 错误信息: {e.stderr}")
return None
def restore_mongo(backup_source_path):
"""
执行恢复操作
backup_source_path: 备份文件夹的路径 (例如 ./mongo_backups/2023-10-27_10-00-00)
"""
# mongodump 的输出结构通常是: backup_dir/db_name/collection.bson
# 所以我们需要指向具体的数据库文件夹,或者指向父文件夹并指定 --db
target_dir = os.path.join(backup_source_path, MONGO_DB_NAME)
if not os.path.exists(target_dir):
print(
f"[-] 错误: 在路径 {backup_source_path} 下找不到数据库 {MONGO_DB_NAME} 的备份文件。"
)
return
print(f"[*] 开始恢复数据库: {MONGO_DB_NAME}{target_dir} ...")
# 构建 mongorestore 命令
# 命令格式: mongorestore --host <host> --port <port> --db <db> <path_to_bson_files> [auth]
cmd = [
"mongorestore",
"--host",
MONGO_HOST,
"--port",
MONGO_PORT,
"--db",
MONGO_DB_NAME,
"--drop", # 警告:这会在恢复前删除现有集合,确保数据干净。根据需要移除此项。
target_dir,
]
cmd.extend(get_auth_args())
try:
result = subprocess.run(cmd, check=True, text=True, capture_output=True)
print(f"[+] 恢复成功!")
print(f" 日志: {result.stderr}")
except subprocess.CalledProcessError as e:
print(f"[-] 恢复失败: {e}")
print(f" 错误信息: {e.stderr}")
# ================= 主程序入口 =================
if __name__ == "__main__":
print("请选择操作:")
print("1. 备份数据库 (Backup)")
print("2. 恢复数据库 (Restore)")
choice = input("请输入数字 (1/2): ").strip()
if choice == "1":
backup_mongo()
elif choice == "2":
# 列出所有备份供用户选择
if not os.path.exists(BACKUP_DIR_ROOT):
print("[-] 没有找到备份目录。")
else:
backups = sorted(os.listdir(BACKUP_DIR_ROOT))
if not backups:
print("[-] 目录为空。")
else:
print("\n可用备份:")
for idx, name in enumerate(backups):
print(f"{idx + 1}. {name}")
try:
idx_choice = int(input("\n请选择要恢复的备份编号: ")) - 1
if 0 <= idx_choice < len(backups):
selected_backup = os.path.join(
BACKUP_DIR_ROOT, backups[idx_choice]
)
restore_mongo(selected_backup)
else:
print("[-] 无效的选择。")
except ValueError:
print("[-] 请输入数字。")
else:
print("[-] 无效输入,退出。")
+6 -6
View File
@@ -14,13 +14,13 @@ from models.jsdata_pojo import JsDataPojo
from models.result_pojo import RequestResult
from utils.get_only_datadome_cookies import get_datadome_cookies, get_app_cookies, get_lang_cookies, \
retain_only_dataome_cookies
from workers.proxy_constants import PROXY_TIMEOUT_S
API_KEY = "d66aaf490d8aa424a5175e1fbd1aadea"
HOST_ADDRESS = "https://api.2captcha.com/createTask"
HERMES_REGISTER = "https://rendezvousparis.hermes.com/client/register"
class CaptchaResultGetter:
def __init__(self):
@@ -51,7 +51,7 @@ class CaptchaResultGetter:
print(proxy_to_use)
try:
response = requests.get(url=HERMES_REGISTER, headers=headers, verify=False, proxies=proxy_to_use,
timeout=15)
timeout=PROXY_TIMEOUT_S)
print(response.status_code)
if response.status_code == 200:
print(response.text)
@@ -94,7 +94,7 @@ class CaptchaResultGetter:
raw_data = self.get_le_type_raw_data(old_valid_cookie=old_valid_cookie, js_le_type_data=js_le_type_data)
response = requests.post(url="https://d.digital.hermes/js/", headers=headers, verify=False,
data=raw_data,
proxies=proxy_to_use, timeout=15)
proxies=proxy_to_use, timeout=PROXY_TIMEOUT_S)
# print(response.status_code)
if response.status_code == 200:
print(response.text)
@@ -109,7 +109,7 @@ class CaptchaResultGetter:
return None
def get_ch_raw_data_from_js_data(self, js_data: JsDataPojo, old_valid_cookie) -> str:
_tag_version = "5.1.9"
_tag_version = "5.1.12"
_jspl = encrpte_to_jspl(js_data.to_url_encoded_json())
_raw_data = "jspl={}&eventCounters=%5B%5D&jsType=ch&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv={}".format(
_jspl, old_valid_cookie, _tag_version)
@@ -149,7 +149,7 @@ class CaptchaResultGetter:
response = requests.post(url="https://d.digital.hermes/js/", headers=headers, verify=False,
data=self.get_ch_raw_data_from_js_data(js_data=js_data,
old_valid_cookie=_cookies_to_use),
proxies=proxy_to_use, timeout=15)
proxies=proxy_to_use, timeout=PROXY_TIMEOUT_S)
print(response.status_code)
if response.status_code == 200:
print(response.text)
@@ -180,7 +180,7 @@ class CaptchaResultGetter:
# old_valid_cookie=old_valid_cookie)
_cid = get_datadome_cookies(old_valid_cookie)
_jspl = encrpte_to_jspl(js_le_type_data.to_url_encoded_json())
_raw_data = "jsData={}&eventCounters=%7B%22mousemove%22%3A{}%2C%22click%22%3A{}%2C%22scroll%22%3A{}%2C%22touchstart%22%3A{}%2C%22touchend%22%3A{}%2C%22touchmove%22%3A{}%2C%22keydown%22%3A{}%2C%22keyup%22%3A{}%7D&jsType=le&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv=5.1.9".format(
_raw_data = "jsData={}&eventCounters=%7B%22mousemove%22%3A{}%2C%22click%22%3A{}%2C%22scroll%22%3A{}%2C%22touchstart%22%3A{}%2C%22touchend%22%3A{}%2C%22touchmove%22%3A{}%2C%22keydown%22%3A{}%2C%22keyup%22%3A{}%7D&jsType=le&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv=5.1.12".format(
_jspl, mousemove_count, click_count, scroll_count, touch_count, touch_count,
touch_move,
key_count,
+1
View File
@@ -0,0 +1 @@
PROXY_TIMEOUT_S = 20
+2 -1
View File
@@ -13,6 +13,7 @@ from models.result_pojo import RequestResult
from queue_message.CookiesPublisher import CookiesPublisher
from utils.address_ip import get_address_ip
from workers.captcha_result_getter import CaptchaResultGetter
from workers.proxy_constants import PROXY_TIMEOUT_S
def get_chrome_version_from_ua(ua):
@@ -206,7 +207,7 @@ class Sender:
proxy_to_use = self.proxy_to_use
# print(proxy_to_use)
response = requests.post(url=url, proxies=proxy_to_use, verify=False, headers=headers, data=data,
timeout=15, allow_redirects=False, impersonate="chrome99_android")
timeout=PROXY_TIMEOUT_S, allow_redirects=False, impersonate="chrome99_android")
self.logger.info(response.status_code)
if response.status_code == 302:
# add to mongodb