Compare commits

..

1 Commits

Author SHA1 Message Date
panleicim c6ac87bdfa support model and wait only for 30s for cookie used by csrf 2025-07-10 19:58:31 +02:00
6 changed files with 71 additions and 49 deletions
+3 -1
View File
@@ -83,7 +83,7 @@ class MongoDbManager:
result_list.append(ContactPojo.from_firestore_dict(document))
return result_list
def save_links_to_validate(self, link: str, mail_address: str, _all_contact_list: list):
def save_links_to_validate(self, link: str, mail_address: str, model: str, _all_contact_list: list):
collection_to_use = self.db[LINKS_TO_VALIDATE]
updated_at = time.strftime("%H:%M:%S", time.localtime())
_ip_country = "FR"
@@ -98,6 +98,7 @@ class MongoDbManager:
u'url': link,
u'email': mail_address,
u'serial': serial,
u'model': model,
u'ip_country': _ip_country,
"updated_at": updated_at
},
@@ -106,6 +107,7 @@ class MongoDbManager:
collection_to_use.replace_one(filter={'_id': link, }, replacement={
u'url': link,
u'serial': serial,
u'model': model,
u'ip_country': _ip_country,
"updated_at": updated_at
},
+47 -41
View File
@@ -6,11 +6,13 @@ from builtins import list
from concurrent.futures import ThreadPoolExecutor
from email.header import decode_header
from email.message import Message
from typing import Union
from imapclient import IMAPClient
from db.mongo_manager import MONGO_STORE_MANAGER
from excel_reader import read_contacts
from mail.mail_constants import DOMAIN_HOTMAIL, create_imap
from models.ReserveResultPojo import ReserveResultPojo
from models.mail_pojo import MailPojo, MailAddress
VALIDATION_URL_SUBJECT_fr = 'Validation de votre demande de rendez-vous'
@@ -204,21 +206,31 @@ class MailReader():
return mail_messages
def need_to_valid_url(url: str, successful_items) -> bool:
# return True
# if len(successful_items) == 0:
# return False
#
# Find the ReserveResultPojo object from persisted items of DB
#
def find_item_by_url(url: str, successful_items) -> Union[None, ReserveResultPojo]:
print("url is :" + url)
parts = url.split('/')
_id = parts[5]
if len(_id) == 6:
for item in successful_items:
if item.id == _id:
return item
return None
def need_to_valid_url(url: str, item: Union[ReserveResultPojo, None]) -> bool:
print("url is :" + url)
parts = url.split('/')
id = parts[5]
if len(id) == 6:
for item in successful_items:
if item.id == id:
if item.url_validated is not None:
return not item.url_validated
else:
# if url_validated is None
return True
if item:
if item.url_validated is not None:
return not item.url_validated
else:
# if url_validated is None
return True
return True
else:
print("id not valid:{}".format(id))
@@ -242,43 +254,37 @@ def need_to_check_email(mail: str, successful_items) -> bool:
def find_links_to_validate_from_mail_list(mail_list: list, logger):
if not mail_list:
return
# check time before start checking emails
if len(mail_list) > 0:
contact_to_book_list = MONGO_STORE_MANAGER.get_all_contact_to_book_list()
successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
mails_messages = []
with ThreadPoolExecutor(max_workers=len(mail_list)) as executor:
for mail in mail_list:
# check whether we need to read mail
if need_to_check_email(mail.mail, successful_items):
mail_reader = MailReader(mail.mail, mail.password)
executor.submit(mail_reader.read_emails, mails_messages)
# for mail in mail_list:
# # check whether we need to read mail
# if need_to_check_email(mail.mail, successful_items):
# mail_reader = MailReader(mail.mail, mail.password)
# mail_reader.read_emails(mails_messages)
_refreshed_successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
for mail in mails_messages:
match = re.search(VALIDATION_URL_REGEX, mail.body)
if match:
url = match.group(0)
if need_to_valid_url(url, _refreshed_successful_items):
logger.info("need to validate url: " + url)
MONGO_STORE_MANAGER.save_links_to_validate(url, mail.to_address,
_all_contact_list=contact_to_book_list)
else:
logger.info("do not need to click url --> {}".format(mail.mail_address))
contact_to_book_list = MONGO_STORE_MANAGER.get_all_contact_to_book_list()
successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
mails_messages = []
with ThreadPoolExecutor(max_workers=len(mail_list)) as executor:
for mail in mail_list:
# check whether we need to read mail
if need_to_check_email(mail.mail, successful_items):
mail_reader = MailReader(mail.mail, mail.password)
executor.submit(mail_reader.read_emails, mails_messages)
_refreshed_successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
for mail in mails_messages:
match = re.search(VALIDATION_URL_REGEX, mail.body)
if match:
url = match.group(0)
_item = find_item_by_url(url, _refreshed_successful_items)
if need_to_valid_url(url, _item):
logger.info("need to validate url: " + url)
MONGO_STORE_MANAGER.save_links_to_validate(url, mail.to_address, model=_item.model,
_all_contact_list=contact_to_book_list)
else:
logger.info("do not need to click url --> {}".format(mail.mail_address))
if __name__ == '__main__':
# mail_address1 = MailAddress(mail="tinagonzales685585@aol.com", password="yhihvdkrbxnksema")
# mail_list = [mail_address1]
contact_to_book_list = read_contacts(
# file_name="/Users/rdv/Desktop/contact_list_not_used_contacts.xlsx")
# file_name="/Users/lpan/Desktop/contact_list_not_used_contacts.xlsx")
# file_name="/Users/rdv/Desktop/real_name_contacts_100_hotmail.xlsx")
file_name="/Users/rdv/Desktop/contact_list_2025-06-23.xlsx")
file_name="/Users/lpan/Desktop/contact_list_2025-06-27_gmx.xlsx")
# file_name="/Users/rdv/Desktop/contact_list_all_studio.xlsx")
# file_name="/Users/rdv/Desktop/contact_list_all_studo_gmx_us.xlsx")
# file_name="/Users/rdv/Desktop/contact_list_2025-05-24.xlsx")
+3
View File
@@ -107,6 +107,9 @@ class ReserveResultPojo:
if 'created_at' in source:
created_at = source['created_at']
result.created_at = created_at
if 'model' in source:
model = source['model']
result.model = model
if 'validated_at' in source:
validated_at = source['validated_at']
result.validated_at = validated_at
+14 -2
View File
@@ -3,6 +3,8 @@ import json
import random
import threading
import time
from http.cookies import SimpleCookie
from typing import Optional
import pika
@@ -62,6 +64,16 @@ def is_open():
return is_time_between(datetime.time(10, 30), datetime.time(19, 00))
def get_xsfr_token_from_cookies(cookies_str: str) -> Optional[str]:
_simple_cookies = SimpleCookie()
_simple_cookies.load(cookies_str)
if "x-xsrf-token" not in _simple_cookies:
return None
else:
_xsfr_token = _simple_cookies["x-xsrf-token"].value
return _xsfr_token
class AppointmentRequestSender(threading.Thread):
def __init__(self, sub_contact_list: list, logger, cookiesPublisher: CookiesPublisher,
bakeUpCookiesPublisher: CookiesPublisher,
@@ -213,9 +225,9 @@ class AppointmentRequestSender(threading.Thread):
# 如果在发送请求时出现csrf被拦截的情况,那么就需要重新发布cookie以目前的队列中,因为这个cookie可能重新利用
self.logger.info("csrf blocked, will republish cookie")
self.cookiesPublisher.publish_body(_received_object)
self.logger.info("csrf blocked, will wait 60 seconds")
time.sleep(60)
ch.basic_ack(delivery_tag=method.delivery_tag)
self.logger.info("csrf blocked, will wait 30 seconds")
time.sleep(30)
elif can_continue is not None and can_continue == RequestResult.BLOCKED:
self.logger.info("这个cookies可以给点链接用")
self.bakeUpCookiesPublisher.publish_body(_received_object)
+2 -3
View File
@@ -2,14 +2,13 @@ import json
import random
import re
from typing import Union
import requests
# import requests
from curl_cffi import requests
from models.jsdata_le_pojo import JsDataLeTypePojo
from models.jsdata_pojo import JsDataPojo
from models.result_pojo import RequestResult
from utils.get_only_datadome_cookies import get_datadome_cookies, get_app_cookies, get_lang_cookies, \
retain_only_dataome_cookies
from workers.proxies_constants import PROXY_LIST_FR
API_KEY = "d66aaf490d8aa424a5175e1fbd1aadea"
+2 -2
View File
@@ -230,8 +230,8 @@ def validate_all_links(_contact_serial_list):
# default_segment_number = 20
_first_25_percent_links = link_to_validated[0:(int(len(all_link_list) / divided))]
_first_25_percent_links = all_link_list
_queue_name = MORNING_DATA_CACHE
# _queue_name = MORNING_DATA_CACHE_BAK
# _queue_name = MORNING_DATA_CACHE
_queue_name = MORNING_DATA_CACHE_BAK
# if len(all_link_list) > divided * default_segment_number:
# _segment_number = default_segment_number
# else: