From 53ea721cb6b78146cb05eaf965623a1609fd32f5 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Tue, 19 Mar 2024 13:51:44 +0100 Subject: [PATCH 01/22] can get le type jsdata --- models/jsdata_le_pojo.py | 60 +++++++++++ models/jsdata_pojo.py | 2 +- queue_message/CookiesPublisher.py | 1 + queue_message/appointmentrequestsender.py | 82 +++++++------- queue_message/link_validator_receiver.py | 28 ----- request_sender.py | 16 +-- utils/get_only_datadome_cookies.py | 70 ++++++++++++ workers/captcha_result_getter.py | 126 +++++++++------------- workers/proxies_constants.py | 9 +- workers/sender.py | 8 +- 10 files changed, 245 insertions(+), 157 deletions(-) create mode 100644 models/jsdata_le_pojo.py create mode 100644 utils/get_only_datadome_cookies.py diff --git a/models/jsdata_le_pojo.py b/models/jsdata_le_pojo.py new file mode 100644 index 0000000..52a3a82 --- /dev/null +++ b/models/jsdata_le_pojo.py @@ -0,0 +1,60 @@ +import json + +from models.jsdata_pojo import JsDataPojo + + +# "log2": true, +# "dcok": ".hermes.com", +# "m_fmi": false, +# "tbce": 0, +# "es_sigmdn": null, +# "es_mumdn": null, +# "es_distmdn": null, +# "es_angsmdn": null, +# "es_angemdn": null, +# "m_s_c": 0, +# "m_m_c": 3, +# "m_c_c": 3, +# "m_cm_r": 1, +# "m_ms_r": -1 +class JsDataLeTypePojo(JsDataPojo): + + def __init__(self, glrd, glvd, hc, br_oh, ua, br_ow, br_h, br_w, rs_h, rs_w, rs_cd, ars_h, ars_w, plg, eva, plu, + vnd, pr, ts_mtp, dvm, m_s_c, m_m_c, m_c_c, m_cm_r, m_ms_r): + super().__init__(glrd, glvd, hc, br_oh, ua, br_ow, br_h, br_w, rs_h, rs_w, rs_cd, ars_h, ars_w, plg, eva, plu, + vnd, pr, ts_mtp, dvm) + self.log2 = True + self.dcok = ".hermes.com" + self.m_fmi = False + self.tbce = 0 + self.es_sigmdn = None + self.es_mumdn = None + self.es_distmdn = None + self.es_angsmdn = None + self.es_angemdn = None + self.m_s_c = m_s_c + self.m_m_c = m_m_c + self.m_c_c = m_c_c + self.m_cm_r = m_cm_r + self.m_ms_r = m_ms_r + + +test_data_json = """{"glvd": "Qualcomm", "glrd": "Adreno (TM) 630", "hc": 2, "br_oh": 663, "br_ow": 384, "br_h": 663, + "br_w": 384, "rs_h": 811, "rs_w": 384, "rs_cd": 24, "ars_h": 811, "ars_w": 384, "plg": 2, "eva": 33, + "vnd": "Google Inc.", "plu": ["5gQIEKN", "Iv26GLka"], + "ua": "Mozilla/5.0 (Linux; Android 11) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36", + "dvm": 4, "acw": "probably", "pr": 2.8125, "ts_mtp": 5, + "cookiesStr": "app.sig=Arp0SzXCTBF2ordHf3pPTd6cS14;datadome=o_7W8TjHcQjqvnFjFBGAZZ0OmNOd0fnJXfzgqOajAHBBNv7zNMFi2~Jn7wQbTEIZdr2qvg0IFFQQr~Zk5ifzM154V1XxZVhgDZeVkNyRKrQewLBIzVeSagWs587fBqdX;app=eyJmbGFzaCI6e30sImNhY2hlZmxhc2giOltdLCJjc3JmU2VjcmV0IjoieVVRWmJBWnlRTnQ5bFlNbDdNN3R6eHhqIn0=;policy=accepted;lang=fr;"}""" + +if __name__ == '__main__': + test_data_dict = json.loads(test_data_json) + le_js_data = JsDataLeTypePojo(glrd=test_data_dict['glrd'], glvd=test_data_dict['glvd'], hc=test_data_dict['hc'], + ua=test_data_dict['ua'], br_oh=test_data_dict['br_oh'], br_ow=test_data_dict['br_ow'], + ars_h=test_data_dict['ars_h'], ars_w=test_data_dict['ars_w'], pr=test_data_dict['pr'], + plg=test_data_dict['plg'], br_h=test_data_dict['br_h'], br_w=test_data_dict['br_w'], + plu=test_data_dict['plu'], vnd=test_data_dict['vnd'], dvm=test_data_dict['dvm'], + ts_mtp=test_data_dict['ts_mtp'], eva=test_data_dict['eva'], + rs_h=test_data_dict['rs_h'], + rs_w=test_data_dict['rs_w'], rs_cd=test_data_dict['rs_cd'], m_s_c=0, m_m_c=3, m_c_c=3, + m_cm_r=1, m_ms_r=-1) + print(le_js_data.to_url_encoded_json()) diff --git a/models/jsdata_pojo.py b/models/jsdata_pojo.py index 3eb86be..8f90fca 100644 --- a/models/jsdata_pojo.py +++ b/models/jsdata_pojo.py @@ -138,7 +138,7 @@ class JsDataPojo: def __init__(self, glrd, glvd, hc, br_oh, ua, br_ow, br_h, br_w, rs_h, rs_w, rs_cd, ars_h, ars_w, plg, eva, plu, vnd, pr, ts_mtp, dvm): self.opts = "endpoint,ajaxListenerPath" - self.ttst = random.randint(38, 94) + self.ttst = random.randint(38, 148) self.ifov = False self.hc = hc self.br_oh = br_oh diff --git a/queue_message/CookiesPublisher.py b/queue_message/CookiesPublisher.py index 7d7a280..7162eb5 100644 --- a/queue_message/CookiesPublisher.py +++ b/queue_message/CookiesPublisher.py @@ -7,6 +7,7 @@ REQUEST_DATA_QUEUE_TEST = 'REQUEST_DATA_TEST' TEST_QUEUE = 'TEST_QUEUE' REQUEST_DATA_QUEUE_DE = 'REQUEST_DATA_DE' REQUEST_DATA_OBJECT = 'REQUEST_DATA_OBJECT' +REGISTER_QUEUE = 'REGISTER_QUEUE' credentials = pika.PlainCredentials('appointment', 'ZyuhJZ2xEYWhElhpJjy7YEpZGZwNYJz2fHIu') diff --git a/queue_message/appointmentrequestsender.py b/queue_message/appointmentrequestsender.py index 78ef91b..7d2ed7d 100644 --- a/queue_message/appointmentrequestsender.py +++ b/queue_message/appointmentrequestsender.py @@ -3,12 +3,12 @@ import json import random import threading import time -from http.cookies import SimpleCookie import pika from db.mongo_manager import MONGO_STORE_MANAGER from models.contact_pojo import ContactPojo +from models.jsdata_le_pojo import JsDataLeTypePojo from models.jsdata_pojo import JsDataPojo from models.result_pojo import RequestResult from queue_message.CookiesPublisher import CookiesPublisher @@ -43,34 +43,6 @@ def filter_contacts(_contact_list: list) -> list: return _contact_list_to_book -def get_valid_csrf() -> str: - captchaResultGetter = CaptchaResultGetter() - _valid_cookie = captchaResultGetter.get_valid_cookie() - # while _valid_cookie is None: - # _valid_cookie = captchaResultGetter.get_valid_cookie() - new_csrf = None - while new_csrf is None: - valid_cookie = None - if _valid_cookie is not None: - simple_cookie = SimpleCookie() - simple_cookie.load(_valid_cookie) - new_cookies = {k: v.value for k, v in simple_cookie.items()} - new_coolies_str = "" - for key in new_cookies: - print(key) - new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";" - print(new_coolies_str) - valid_cookie = new_coolies_str + "app=eyJmbGFzaCI6e30sImNhY2hlZmxhc2giOltdLCJjc3JmU2VjcmV0IjoiYnRodHNYU1lvdnl4RzVGakpGRDZsQ0JtIn0=;policy=accepted;lang=fr;" - print(valid_cookie) - new_csrf = captchaResultGetter.get_csrf(valid_cookie) - if new_csrf is None: - _valid_cookie = None - while _valid_cookie is None: - _valid_cookie = captchaResultGetter.get_valid_cookie() - time.sleep(2) - return new_csrf - - def is_open(): return is_time_between(datetime.time(10, 30), datetime.time(19, 00)) @@ -103,6 +75,12 @@ class AppointmentRequestSender(threading.Thread): print("message count in queue is {}".format(_message_count)) _received_object = body.decode("UTF-8") print(f" [x] Received {_received_object}") + _port = random.randint(40001, 49999) + print("generated port is {}".format(_port)) + _choosed_proxy = random.choice(self.proxy_to_use_list) + _proxy_to_use = {} + _proxy_to_use["http"] = _choosed_proxy["http"].format(_port) + _proxy_to_use["https"] = _choosed_proxy["https"].format(_port) if "glrd" in _received_object: _received_dict = json.loads(_received_object) js_data = JsDataPojo(glrd=_received_dict['glrd'], glvd=_received_dict['glvd'], hc=_received_dict['hc'], @@ -115,7 +93,7 @@ class AppointmentRequestSender(threading.Thread): rs_w=_received_dict['rs_w'], rs_cd=_received_dict['rs_cd']) _received_cookies = _received_dict["cookiesStr"] sender = Sender(_received_cookies, cookiesPublisher=self.cookiesPublisher, received_dict=_received_dict, - proxy_to_use=random.choice(self.proxy_to_use_list)) + proxy_to_use=_proxy_to_use) self.contact_list = filter_contacts(self.contact_list) # remove already booked contacts random.shuffle(self.contact_list) @@ -124,20 +102,48 @@ class AppointmentRequestSender(threading.Thread): print("contact number is {}".format(len(self.contact_list))) self.contact_list = filter_contacts(self.contact_list) for con in self.contact_list: - # if not is_already_sent(con): print(con.mail) - # time.sleep(random.randint(1, 5)) if self.valid_csrf is None: self.valid_csrf = captchaResultGetter.get_csrf( - proxy_to_use=random.choice(self.proxy_to_use_list), - cookie=body.decode("UTF-8")) + proxy_to_use=_proxy_to_use, js_data=js_data, + cookie=_received_cookies) _new_cookies = captchaResultGetter.get_valid_ch_cookie(sender.proxy_to_use, js_data, old_valid_cookie=_received_cookies) - if _received_cookies is not None: - print("new cookie is " + _received_cookies) - sender.cookie_str = _received_cookies + if _new_cookies is not None: + print("new cookie is " + _new_cookies) time.sleep(random.randint(1, 5)) - can_continue = sender.send_request(HERMES_REGISTER, con, csrf=self.valid_csrf) + # m_s_c = f.scroll + m_s_c = random.randint(0, 3) + m_c_c = random.randint(3, 5) # click count + m_m_c = random.randint(3, 5) # move count + m_cm_r = m_c_c / m_m_c + m_ms_r = random.randint(-1, 1) + + js_le_data = JsDataLeTypePojo(glrd=_received_dict['glrd'], glvd=_received_dict['glvd'], + hc=_received_dict['hc'], + ua=_received_dict['ua'], br_oh=_received_dict['br_oh'], + br_ow=_received_dict['br_ow'], + ars_h=_received_dict['ars_h'], ars_w=_received_dict['ars_w'], + pr=_received_dict['pr'], + plg=_received_dict['plg'], br_h=_received_dict['br_h'], + br_w=_received_dict['br_w'], + plu=_received_dict['plu'], vnd=_received_dict['vnd'], + dvm=_received_dict['dvm'], + ts_mtp=_received_dict['ts_mtp'], eva=_received_dict['eva'], + rs_h=_received_dict['rs_h'], + rs_w=_received_dict['rs_w'], rs_cd=_received_dict['rs_cd'], + m_s_c=m_s_c, m_m_c=m_m_c, m_c_c=m_c_c, + m_cm_r=m_cm_r, m_ms_r=m_ms_r) + _new_le_cookies = captchaResultGetter.get_le_valid_cookie(proxy_to_use=_proxy_to_use, + js_le_type_data=js_le_data, + old_valid_cookie=_new_cookies) + if _new_le_cookies is not None: + print("new le type cookie is " + _new_le_cookies) + sender.cookie_str = _new_le_cookies + time.sleep(random.randint(1, 5)) + can_continue = sender.send_request(HERMES_REGISTER, js_data, con, csrf=self.valid_csrf) + else: + can_continue = RequestResult.COOKIES_ERROR else: can_continue = RequestResult.COOKIES_ERROR if can_continue == RequestResult.BLOCKED: diff --git a/queue_message/link_validator_receiver.py b/queue_message/link_validator_receiver.py index 66abdef..2df1b5a 100644 --- a/queue_message/link_validator_receiver.py +++ b/queue_message/link_validator_receiver.py @@ -38,34 +38,6 @@ def filter_contacts(_contact_list: list) -> list: return _contact_list_to_book -def get_valid_csrf() -> str: - captchaResultGetter = CaptchaResultGetter() - _valid_cookie = captchaResultGetter.get_valid_cookie() - # while _valid_cookie is None: - # _valid_cookie = captchaResultGetter.get_valid_cookie() - new_csrf = None - while new_csrf is None: - valid_cookie = None - if _valid_cookie is not None: - simple_cookie = SimpleCookie() - simple_cookie.load(_valid_cookie) - new_cookies = {k: v.value for k, v in simple_cookie.items()} - new_coolies_str = "" - for key in new_cookies: - print(key) - new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";" - print(new_coolies_str) - valid_cookie = new_coolies_str + "app=eyJmbGFzaCI6e30sImNhY2hlZmxhc2giOltdLCJjc3JmU2VjcmV0IjoiYnRodHNYU1lvdnl4RzVGakpGRDZsQ0JtIn0=;policy=accepted;lang=fr;" - print(valid_cookie) - new_csrf = captchaResultGetter.get_csrf(valid_cookie) - if new_csrf is None: - _valid_cookie = None - while _valid_cookie is None: - _valid_cookie = captchaResultGetter.get_valid_cookie() - time.sleep(2) - return new_csrf - - class LinkValidatorReceiver(threading.Thread): def __init__(self, linkpojo_list: list): self.connection = None diff --git a/request_sender.py b/request_sender.py index 43847b7..497b151 100644 --- a/request_sender.py +++ b/request_sender.py @@ -8,11 +8,11 @@ from db.mongo_manager import MONGO_STORE_MANAGER from excel_reader import read_contacts from models.contact_pojo import ContactPojo from queue_message.CookiesPublisher import CookiesPublisher, REQUEST_DATA_QUEUE_DE, REQUEST_DATA_QUEUE_TEST, \ - REQUEST_DATA_OBJECT, TEST_QUEUE + REQUEST_DATA_OBJECT, TEST_QUEUE, REGISTER_QUEUE from queue_message.appointmentrequestsender import AppointmentRequestSender, REQUEST_DATA_QUEUE from utiles import is_time_between from workers.captcha_result_getter import CaptchaResultGetter, HERMES_REGISTER -from workers.proxies_constants import PROXY_LIST_DE, PROXY_LIST_FR, MOBILE_PROXY_LIST_FR +from workers.proxies_constants import PROXY_LIST_DE, PROXY_LIST_FR, MOBILE_PROXY_LIST_FR, FR_PROXY_MOB_OXY_STICKY from workers.sender import Sender IPFIY = 'http://api.ipify.org' @@ -89,14 +89,16 @@ def send_appointment_request(message_queue_name, _contact_list): if __name__ == '__main__': - contacts_file_path = '~/Desktop/contact_list_2024-03-09.xlsx' + contacts_file_path = '~/Desktop/contact_list_2024-03-15.xlsx' _contact_list = read_contacts(contacts_file_path) - _segment_number = 10 + _contact_list_to_book = filter_contacts(_contact_list) + _segment_number = 5 + print("{} contacts to book".format(len(_contact_list_to_book))) for i in range(0, _segment_number): print(i) - _step = int(len(_contact_list) / _segment_number) - _sublist = _contact_list[i:_step * (i + 1)] + _step = int(len(_contact_list_to_book) / _segment_number) + _sublist = _contact_list_to_book[i:_step * (i + 1)] print(_sublist[0]) print(_sublist[-1]) - _thread1 = Thread(target=send_appointment_request, args=(REQUEST_DATA_OBJECT, _sublist)) + _thread1 = Thread(target=send_appointment_request, args=(TEST_QUEUE, _sublist)) _thread1.start() diff --git a/utils/get_only_datadome_cookies.py b/utils/get_only_datadome_cookies.py new file mode 100644 index 0000000..ec87b98 --- /dev/null +++ b/utils/get_only_datadome_cookies.py @@ -0,0 +1,70 @@ +from http.cookies import SimpleCookie + + +def get_datadome_cookies(cookiesStr: str): + _cookies = SimpleCookie() + _cookies.load(cookiesStr) + new_cookies = {k: v.value for k, v in _cookies.items()} + _new_coolies_str = None + for key in new_cookies: + if key == "datadome": + if _new_coolies_str is None: + _new_coolies_str = "" + _new_coolies_str = _new_coolies_str + new_cookies[key] + return _new_coolies_str + + +def retain_only_dataome_cookies(cookiesStr: str): + _cookies = SimpleCookie() + _cookies.load(cookiesStr) + new_cookies = {k: v.value for k, v in _cookies.items()} + new_coolies_str = None + for key in new_cookies: + if key == "datadome": + if new_coolies_str is None: + new_coolies_str = "" + new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";" + return new_coolies_str + + +def get_app_cookies(cookiesStr: str): + _cookies = SimpleCookie() + _cookies.load(cookiesStr) + new_cookies = {k: v.value for k, v in _cookies.items()} + new_coolies_str = None + for key in new_cookies: + if key == "app.sig": + if new_coolies_str is None: + new_coolies_str = "" + new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";" + return new_coolies_str + + +def get_lang_cookies(cookiesStr: str): + _cookies = SimpleCookie() + _cookies.load(cookiesStr) + new_cookies = {k: v.value for k, v in _cookies.items()} + new_coolies_str = None + for key in new_cookies: + if key == "app": + if new_coolies_str is None: + new_coolies_str = "" + new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";" + if key == "policy": + if new_coolies_str is None: + new_coolies_str = "" + new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";" + if key == "lang": + if new_coolies_str is None: + new_coolies_str = "" + new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";" + return new_coolies_str + + +if __name__ == '__main__': + print(get_datadome_cookies( + "datadome=HS7HPdbsFTYfxD60ogUwxjplOuqrveIjcTaucXVIWCko6pRayhXuKxC~44Ky5eNR2wnx4PEd2P2AadewmeXgxi4vn3x_QKdxLb6H1a9MbaMLNwmxfMEjQQ~CK8Y_nspJ;app=eyJmbGFzaCI6e30sImNhY2hlZmxhc2giOltdLCJjc3JmU2VjcmV0IjoiNXhvX3ZFaWc0aURla0dYMHZGTHNJX2p4In0=;policy=accepted;lang=fr;")) + print(get_app_cookies( + "datadome=HS7HPdbsFTYfxD60ogUwxjplOuqrveIjcTaucXVIWCko6pRayhXuKxC~44Ky5eNR2wnx4PEd2P2AadewmeXgxi4vn3x_QKdxLb6H1a9MbaMLNwmxfMEjQQ~CK8Y_nspJ;app=eyJmbGFzaCI6e30sImNhY2hlZmxhc2giOltdLCJjc3JmU2VjcmV0IjoiNXhvX3ZFaWc0aURla0dYMHZGTHNJX2p4In0=;policy=accepted;lang=fr;")) + print(get_lang_cookies( + "datadome=HS7HPdbsFTYfxD60ogUwxjplOuqrveIjcTaucXVIWCko6pRayhXuKxC~44Ky5eNR2wnx4PEd2P2AadewmeXgxi4vn3x_QKdxLb6H1a9MbaMLNwmxfMEjQQ~CK8Y_nspJ;app=eyJmbGFzaCI6e30sImNhY2hlZmxhc2giOltdLCJjc3JmU2VjcmV0IjoiNXhvX3ZFaWc0aURla0dYMHZGTHNJX2p4In0=;policy=accepted;lang=fr;")) diff --git a/workers/captcha_result_getter.py b/workers/captcha_result_getter.py index 3579d72..9248179 100644 --- a/workers/captcha_result_getter.py +++ b/workers/captcha_result_getter.py @@ -1,11 +1,13 @@ import json import random import re -from time import time from typing import Union import requests +from models.jsdata_le_pojo import JsDataLeTypePojo from models.jsdata_pojo import JsDataPojo +from utils.get_only_datadome_cookies import get_datadome_cookies, get_app_cookies, get_lang_cookies, \ + retain_only_dataome_cookies from workers.proxies_constants import PROXY_LIST_FR API_KEY = "d66aaf490d8aa424a5175e1fbd1aadea" @@ -22,10 +24,10 @@ class CaptchaResultGetter: self.cookie_str = 'datadome=5Nq~NEP_qQSHC0g_lZNnZmEv36J8gVV~rpZ329xmCkTq2~H3meIoXr4h_b988qB2XW5Te7iEGsvq8BzA5KeFupyrZFh4kgrDyl8hT2UymSByKHzAcDaNIBPDsRu2g_KG; Max-Age=31536000; Domain=.hermes.com; Path=/; Secure; SameSite=None' pass - def get_csrf(self, proxy_to_use, cookie: str = None) -> Union[str, None]: + def get_csrf(self, proxy_to_use, js_data: JsDataPojo, cookie: str = None) -> Union[str, None]: if cookie is not None: headers = {'Content-Type': 'application/x-www-form-urlencoded', - 'User-Agent': 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36', + 'User-Agent': js_data.ua, 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8', 'Referer': 'https://www.google.fr', 'Cookie': cookie, @@ -34,7 +36,7 @@ class CaptchaResultGetter: 'Accept-Language': 'fr-FR,fr;q=0.6'} else: headers = {'Content-Type': 'application/x-www-form-urlencoded', - 'User-Agent': 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36', + 'User-Agent': js_data.ua, 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8', 'Referer': 'https://www.google.fr', 'Sec-Fetch-Mode': 'navigate', @@ -63,9 +65,10 @@ class CaptchaResultGetter: return result_list[-1] return None - def get_valid_cookie(self, proxy_to_use, old_valid_cookie: str, dvm=4, hc=6) -> Union[str, None]: + def get_le_valid_cookie(self, proxy_to_use, js_le_type_data: JsDataLeTypePojo, old_valid_cookie: str) -> Union[ + str, None]: headers = {'content-Type': 'application/x-www-form-urlencoded', - 'user-Agent': 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36', + 'user-Agent': js_le_type_data.ua, 'accept': '*/*', 'referer': 'https://rendezvousparis.hermes.com/', 'origin': 'https://rendezvousparis.hermes.com/', @@ -76,16 +79,15 @@ class CaptchaResultGetter: 'sec-ch-ua-platform': 'Android', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'fr-FR,fr;q=0.6'} - print("send request to get new cookie") + print("send request to get le type cookie") print(proxy_to_use) print(headers) try: - # tag_pu = 10 * Math.random() - raw_data = self.get_raw_data(old_valid_cookie=old_valid_cookie, dvm=dvm, hc=hc) + raw_data = self.get_le_type_raw_data(old_valid_cookie=old_valid_cookie, js_le_type_data=js_le_type_data) response = requests.post(url="https://d.digital.hermes/js/", headers=headers, verify=False, data=raw_data, proxies=proxy_to_use, timeout=15) - print(response.status_code) + # print(response.status_code) if response.status_code == 200: print(response.text) returned_result = json.loads(response.text) @@ -99,20 +101,32 @@ class CaptchaResultGetter: return None def get_ch_raw_data_from_js_data(self, js_data: JsDataPojo, old_valid_cookie) -> str: - _tag_version = "4.24.1" + _tag_version = "4.24.2" _raw_data = "jsData={}&eventCounters=%5B%5D&jsType=ch&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv={}".format( js_data.to_url_encoded_json(), old_valid_cookie, _tag_version) print("raw data is " + _raw_data) return _raw_data def get_valid_ch_cookie(self, proxy_to_use, js_data: JsDataPojo, old_valid_cookie: str = None) -> Union[str, None]: + # extract datadome cookie + # get new cookie + _cookies_to_use = get_datadome_cookies(old_valid_cookie) + _app_cookies = get_app_cookies(old_valid_cookie) + _lang_cookies = get_lang_cookies(old_valid_cookie) + _origin = "https://rendezvousparis.hermes.com/" + # _origin = "https://www.hermes.com" + _referer = "https://rendezvousparis.hermes.com/" + # _referer = "https://www.hermes.com" + headers = {'content-Type': 'application/x-www-form-urlencoded', 'user-Agent': js_data.ua, 'accept': '*/*', - 'referer': 'https://rendezvousparis.hermes.com/', - 'origin': 'https://rendezvousparis.hermes.com/', + 'referer': _referer, + 'origin': _origin, + 'request': _origin, 'Sec-Fetch-Mode': 'cors', 'sec-fetch-site': 'cross-site', + 'sec-ch-ua-mobile': '?1', 'sec-gpc': "1", 'sec-fetch-dest': 'empty', 'accept-encoding': 'gzip, deflate, br', @@ -121,10 +135,9 @@ class CaptchaResultGetter: print("send request to get new cookie") print(headers) try: - # tag_pu = 10 * Math.random() response = requests.post(url="https://d.digital.hermes/js/", headers=headers, verify=False, data=self.get_ch_raw_data_from_js_data(js_data=js_data, - old_valid_cookie=old_valid_cookie), + old_valid_cookie=_cookies_to_use), proxies=proxy_to_use, timeout=15) print(response.status_code) if response.status_code == 200: @@ -132,6 +145,9 @@ class CaptchaResultGetter: returned_result = json.loads(response.text) new_cookie = returned_result['cookie'] print(response.url) + new_cookie = retain_only_dataome_cookies(new_cookie) + if _app_cookies is not None: + new_cookie = _app_cookies + new_cookie + _lang_cookies print(new_cookie) return new_cookie except Exception as error: @@ -139,27 +155,25 @@ class CaptchaResultGetter: print(error) return None - def get_raw_data(self, old_valid_cookie: str = None, dvm=4, hc=6): - _raw_data = None - - mousemove_count = random.randint(5, 10) - click_count = random.randint(5, 10) - click_count = mousemove_count - key_count = random.randint(5, 10) + def get_le_type_raw_data(self, old_valid_cookie: str, js_le_type_data: JsDataLeTypePojo): + # m_m_c = f.mousemove + mousemove_count = js_le_type_data.m_m_c + # m_c_c = f.click + click_count = js_le_type_data.m_c_c + # m_s_c = f.scroll + scroll_count = js_le_type_data.m_s_c + key_count = random.randint(5, 12) touch_count = mousemove_count - js_set = int(time()) - tag_pu = random.uniform(1, 10) - while len(str(tag_pu)) != 17: - tag_pu = random.uniform(1, 10) - - if old_valid_cookie is None: - _raw_data = "jsData=%7B%22log1%22%3Afalse%2C%22opts%22%3A%22endpoint%2CajaxListenerPath%22%2C%22ttst%22%3A38%2C%22ifov%22%3Afalse%2C%22hc%22%3A{}%2C%22br_oh%22%3A646%2C%22br_ow%22%3A360%2C%22ua%22%3A%22Mozilla%2F5.0%20(Linux%3B%20Android%2010%3B%20K)%20AppleWebKit%2F537.36%20(KHTML%2C%20like%20Gecko)%20Chrome%2F115.0.0.0%20Mobile%20Safari%2F537.36%22%2C%22wbd%22%3Afalse%2C%22tagpu%22%3A{}%2C%22wdif%22%3Afalse%2C%22wdifrm%22%3Afalse%2C%22npmtm%22%3Afalse%2C%22br_h%22%3A646%2C%22br_w%22%3A360%2C%22nddc%22%3A1%2C%22rs_h%22%3A800%2C%22rs_w%22%3A360%2C%22rs_cd%22%3A24%2C%22phe%22%3Afalse%2C%22nm%22%3Afalse%2C%22jsf%22%3Afalse%2C%22lg%22%3A%22fr-FR%22%2C%22pr%22%3A3%2C%22ars_h%22%3A800%2C%22ars_w%22%3A360%2C%22tz%22%3A-60%2C%22str_ss%22%3Atrue%2C%22str_ls%22%3Atrue%2C%22str_idb%22%3Atrue%2C%22str_odb%22%3Atrue%2C%22plgod%22%3Afalse%2C%22plg%22%3A2%2C%22plgne%22%3A%22err%22%2C%22plgre%22%3A%22err%22%2C%22plgof%22%3A%22err%22%2C%22plggt%22%3A%22err%22%2C%22pltod%22%3Afalse%2C%22hcovdr%22%3Afalse%2C%22hcovdr2%22%3Afalse%2C%22plovdr%22%3Afalse%2C%22plovdr2%22%3Afalse%2C%22ftsovdr%22%3Afalse%2C%22ftsovdr2%22%3Afalse%2C%22lb%22%3Afalse%2C%22eva%22%3A33%2C%22lo%22%3Afalse%2C%22ts_mtp%22%3A5%2C%22ts_tec%22%3Atrue%2C%22ts_tsa%22%3Atrue%2C%22vnd%22%3A%22Google%20Inc.%22%2C%22bid%22%3A%22NA%22%2C%22mmt%22%3A%22empty%22%2C%22plu%22%3A%226GLkaVK%2CjRvAfXq0%22%2C%22hdn%22%3Afalse%2C%22awe%22%3Afalse%2C%22geb%22%3Afalse%2C%22dat%22%3Afalse%2C%22med%22%3A%22defined%22%2C%22aco%22%3A%22probably%22%2C%22acots%22%3Afalse%2C%22acmp%22%3A%22probably%22%2C%22acmpts%22%3Atrue%2C%22acw%22%3A%22probably%22%2C%22acwts%22%3Afalse%2C%22acma%22%3A%22maybe%22%2C%22acmats%22%3Afalse%2C%22acaa%22%3A%22probably%22%2C%22acaats%22%3Atrue%2C%22ac3%22%3A%22%22%2C%22ac3ts%22%3Afalse%2C%22acf%22%3A%22probably%22%2C%22acfts%22%3Afalse%2C%22acmp4%22%3A%22maybe%22%2C%22acmp4ts%22%3Afalse%2C%22acmp3%22%3A%22probably%22%2C%22acmp3ts%22%3Afalse%2C%22acwm%22%3A%22maybe%22%2C%22acwmts%22%3Afalse%2C%22ocpt%22%3Afalse%2C%22vco%22%3A%22%22%2C%22vcots%22%3Afalse%2C%22vch%22%3A%22probably%22%2C%22vchts%22%3Atrue%2C%22vcw%22%3A%22probably%22%2C%22vcwts%22%3Atrue%2C%22vc3%22%3A%22maybe%22%2C%22vc3ts%22%3Afalse%2C%22vcmp%22%3A%22%22%2C%22vcmpts%22%3Afalse%2C%22vcq%22%3A%22%22%2C%22vcqts%22%3Afalse%2C%22vc1%22%3A%22probably%22%2C%22vc1ts%22%3Atrue%2C%22dvm%22%3A{}%2C%22sqt%22%3Afalse%2C%22so%22%3A%22portrait-primary%22%2C%22wdw%22%3Atrue%2C%22cokys%22%3A%22bG9hZFRpbWVzY3NpL%3D%22%2C%22ecpc%22%3Afalse%2C%22lgs%22%3Atrue%2C%22lgsod%22%3Afalse%2C%22psn%22%3Atrue%2C%22edp%22%3Afalse%2C%22addt%22%3Atrue%2C%22wsdc%22%3Atrue%2C%22ccsr%22%3Atrue%2C%22nuad%22%3Atrue%2C%22bcda%22%3Atrue%2C%22idn%22%3Atrue%2C%22capi%22%3Afalse%2C%22svde%22%3Afalse%2C%22vpbq%22%3Atrue%2C%22ucdv%22%3Afalse%2C%22spwn%22%3Afalse%2C%22emt%22%3Afalse%2C%22bfr%22%3Afalse%2C%22dbov%22%3Afalse%2C%22cfpfe%22%3A%22ZnVuY3Rpb24oKXt2YXIgbj10LG89ZG9jdW1lbnRbJ1x4NzFceDc1XHg2NVx4NzJceDc5XHg1M1x4NjVceDZjXHg2NVx4NjNceDc0XHg2Zlx4NzInXShuKDQ3MCkpO2lmKG8peyFmdW5jdGlvbiB0KCl7dmFyIGk9bjt0cnl7dmFyIGE9b1snXHg3M1x4NjhceDYxXHg2%22%2C%22stcfp%22%3A%22Oi8vZC5kaWdpdGFsLmhlcm1lcy90YWdzLmpzOjI6NzUyMzcpCiAgICBhdCB0LmV4cG9ydHMuZGRfYWEgKGh0dHBzOi8vZC5kaWdpdGFsLmhlcm1lcy90YWdzLmpzOjI6ODg0NDkpCiAgICBhdCBodHRwczovL2QuZGlnaXRhbC5oZXJtZXMvdGFncy5qczoyOjUxMTY1%22%2C%22ckwa%22%3Atrue%2C%22glvd%22%3A%22Google%20Inc.%20(Qualcomm)%22%2C%22glrd%22%3A%22ANGLE%20(Qualcomm%2C%20Adreno%20(TM)%20650%2C%20OpenGL%20ES%203.2)%22%2C%22wwl%22%3Afalse%2C%22jset%22%3A{}%2C%22dcok%22%3A%22.hermes.com%22%2C%22log2%22%3Atrue%2C%22tbce%22%3A0%2C%22es_sigmdn%22%3Anull%2C%22es_mumdn%22%3Anull%2C%22es_distmdn%22%3Anull%2C%22es_angsmdn%22%3Anull%2C%22es_angemdn%22%3Anull%2C%22m_s_c%22%3A3%2C%22m_m_c%22%3A5%2C%22m_c_c%22%3A5%2C%22m_cm_r%22%3A1%2C%22m_ms_r%22%3A1.6666666666666667%7D&eventCounters=%7B%22mousemove%22%3A{}%2C%22click%22%3A{}%2C%22scroll%22%3A3%2C%22touchstart%22%3A{}%2C%22touchend%22%3A{}%2C%22touchmove%22%3A0%2C%22keydown%22%3A{}%2C%22keyup%22%3A{}%7D&jsType=le&cid=S2ilUoO80StZ2OfedRZnvyMiJ9hmD_DyIWj11O2eak~8TOek7JTTeJeKWDLzGgMZaH2IOix3BX1ocB4TwnLQFx~76pAQAD0JnVZzoxwbxOPL2SR834BfbiK0Nu4c0Pxh&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv=4.19.3".format( - hc, tag_pu, dvm, js_set, mousemove_count, click_count, touch_count, touch_count, key_count, key_count) - else: - _raw_data = "jsData=%7B%22log1%22%3Afalse%2C%22opts%22%3A%22endpoint%2CajaxListenerPath%22%2C%22ttst%22%3A38%2C%22ifov%22%3Afalse%2C%22hc%22%3A{}%2C%22br_oh%22%3A646%2C%22br_ow%22%3A360%2C%22ua%22%3A%22Mozilla%2F5.0%20(Linux%3B%20Android%2010%3B%20K)%20AppleWebKit%2F537.36%20(KHTML%2C%20like%20Gecko)%20Chrome%2F115.0.0.0%20Mobile%20Safari%2F537.36%22%2C%22wbd%22%3Afalse%2C%22tagpu%22%3A{}%2C%22wdif%22%3Afalse%2C%22wdifrm%22%3Afalse%2C%22npmtm%22%3Afalse%2C%22br_h%22%3A646%2C%22br_w%22%3A360%2C%22nddc%22%3A1%2C%22rs_h%22%3A800%2C%22rs_w%22%3A360%2C%22rs_cd%22%3A24%2C%22phe%22%3Afalse%2C%22nm%22%3Afalse%2C%22jsf%22%3Afalse%2C%22lg%22%3A%22fr-FR%22%2C%22pr%22%3A3%2C%22ars_h%22%3A800%2C%22ars_w%22%3A360%2C%22tz%22%3A-60%2C%22str_ss%22%3Atrue%2C%22str_ls%22%3Atrue%2C%22str_idb%22%3Atrue%2C%22str_odb%22%3Atrue%2C%22plgod%22%3Afalse%2C%22plg%22%3A2%2C%22plgne%22%3A%22err%22%2C%22plgre%22%3A%22err%22%2C%22plgof%22%3A%22err%22%2C%22plggt%22%3A%22err%22%2C%22pltod%22%3Afalse%2C%22hcovdr%22%3Afalse%2C%22hcovdr2%22%3Afalse%2C%22plovdr%22%3Afalse%2C%22plovdr2%22%3Afalse%2C%22ftsovdr%22%3Afalse%2C%22ftsovdr2%22%3Afalse%2C%22lb%22%3Afalse%2C%22eva%22%3A33%2C%22lo%22%3Afalse%2C%22ts_mtp%22%3A5%2C%22ts_tec%22%3Atrue%2C%22ts_tsa%22%3Atrue%2C%22vnd%22%3A%22Google%20Inc.%22%2C%22bid%22%3A%22NA%22%2C%22mmt%22%3A%22empty%22%2C%22plu%22%3A%226GLkaVK%2CjRvAfXq0%22%2C%22hdn%22%3Afalse%2C%22awe%22%3Afalse%2C%22geb%22%3Afalse%2C%22dat%22%3Afalse%2C%22med%22%3A%22defined%22%2C%22aco%22%3A%22probably%22%2C%22acots%22%3Afalse%2C%22acmp%22%3A%22probably%22%2C%22acmpts%22%3Atrue%2C%22acw%22%3A%22probably%22%2C%22acwts%22%3Afalse%2C%22acma%22%3A%22maybe%22%2C%22acmats%22%3Afalse%2C%22acaa%22%3A%22probably%22%2C%22acaats%22%3Atrue%2C%22ac3%22%3A%22%22%2C%22ac3ts%22%3Afalse%2C%22acf%22%3A%22probably%22%2C%22acfts%22%3Afalse%2C%22acmp4%22%3A%22maybe%22%2C%22acmp4ts%22%3Afalse%2C%22acmp3%22%3A%22probably%22%2C%22acmp3ts%22%3Afalse%2C%22acwm%22%3A%22maybe%22%2C%22acwmts%22%3Afalse%2C%22ocpt%22%3Afalse%2C%22vco%22%3A%22%22%2C%22vcots%22%3Afalse%2C%22vch%22%3A%22probably%22%2C%22vchts%22%3Atrue%2C%22vcw%22%3A%22probably%22%2C%22vcwts%22%3Atrue%2C%22vc3%22%3A%22maybe%22%2C%22vc3ts%22%3Afalse%2C%22vcmp%22%3A%22%22%2C%22vcmpts%22%3Afalse%2C%22vcq%22%3A%22%22%2C%22vcqts%22%3Afalse%2C%22vc1%22%3A%22probably%22%2C%22vc1ts%22%3Atrue%2C%22dvm%22%3A{}%2C%22sqt%22%3Afalse%2C%22so%22%3A%22portrait-primary%22%2C%22wdw%22%3Atrue%2C%22cokys%22%3A%22bG9hZFRpbWVzY3NpL%3D%22%2C%22ecpc%22%3Afalse%2C%22lgs%22%3Atrue%2C%22lgsod%22%3Afalse%2C%22psn%22%3Atrue%2C%22edp%22%3Afalse%2C%22addt%22%3Atrue%2C%22wsdc%22%3Atrue%2C%22ccsr%22%3Atrue%2C%22nuad%22%3Atrue%2C%22bcda%22%3Atrue%2C%22idn%22%3Atrue%2C%22capi%22%3Afalse%2C%22svde%22%3Afalse%2C%22vpbq%22%3Atrue%2C%22ucdv%22%3Afalse%2C%22spwn%22%3Afalse%2C%22emt%22%3Afalse%2C%22bfr%22%3Afalse%2C%22dbov%22%3Afalse%2C%22cfpfe%22%3A%22ZnVuY3Rpb24oKXt2YXIgbj10LG89ZG9jdW1lbnRbJ1x4NzFceDc1XHg2NVx4NzJceDc5XHg1M1x4NjVceDZjXHg2NVx4NjNceDc0XHg2Zlx4NzInXShuKDQ3MCkpO2lmKG8peyFmdW5jdGlvbiB0KCl7dmFyIGk9bjt0cnl7dmFyIGE9b1snXHg3M1x4NjhceDYxXHg2%22%2C%22stcfp%22%3A%22Oi8vZC5kaWdpdGFsLmhlcm1lcy90YWdzLmpzOjI6NzUyMzcpCiAgICBhdCB0LmV4cG9ydHMuZGRfYWEgKGh0dHBzOi8vZC5kaWdpdGFsLmhlcm1lcy90YWdzLmpzOjI6ODg0NDkpCiAgICBhdCBodHRwczovL2QuZGlnaXRhbC5oZXJtZXMvdGFncy5qczoyOjUxMTY1%22%2C%22ckwa%22%3Atrue%2C%22glvd%22%3A%22Google%20Inc.%20(Qualcomm)%22%2C%22glrd%22%3A%22ANGLE%20(Qualcomm%2C%20Adreno%20(TM)%20650%2C%20OpenGL%20ES%203.2)%22%2C%22wwl%22%3Afalse%2C%22jset%22%3A{}%2C%22dcok%22%3A%22.hermes.com%22%2C%22log2%22%3Atrue%2C%22tbce%22%3A0%2C%22es_sigmdn%22%3Anull%2C%22es_mumdn%22%3Anull%2C%22es_distmdn%22%3Anull%2C%22es_angsmdn%22%3Anull%2C%22es_angemdn%22%3Anull%2C%22m_s_c%22%3A3%2C%22m_m_c%22%3A5%2C%22m_c_c%22%3A5%2C%22m_cm_r%22%3A1%2C%22m_ms_r%22%3A1.6666666666666667%7D&eventCounters=%7B%22mousemove%22%3A{}%2C%22click%22%3A{}%2C%22scroll%22%3A3%2C%22touchstart%22%3A{}%2C%22touchend%22%3A{}%2C%22touchmove%22%3A0%2C%22keydown%22%3A{}%2C%22keyup%22%3A{}%7D&jsType=le&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv=4.19.3".format( - hc, tag_pu, dvm, js_set, mousemove_count, click_count, touch_count, touch_count, key_count, - key_count, old_valid_cookie) - print("raw data is " + _raw_data) + touch_move = touch_count + 2 + # _le_js_raw_data = self.get_le_raw_data_from_js_data(js_le_type_data=js_le_type_data, + # old_valid_cookie=old_valid_cookie) + _cid = get_datadome_cookies(old_valid_cookie) + _raw_data = "jsData={}&eventCounters=%7B%22mousemove%22%3A{}%2C%22click%22%3A{}%2C%22scroll%22%3A{}%2C%22touchstart%22%3A{}%2C%22touchend%22%3A{}%2C%22touchmove%22%3A{}%2C%22keydown%22%3A{}%2C%22keyup%22%3A{}%7D&jsType=le&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv=4.24.2".format( + js_le_type_data.to_url_encoded_json(), mousemove_count, click_count, scroll_count, touch_count, touch_count, + touch_move, + key_count, + key_count, _cid) + print("raw le data is " + _raw_data) return _raw_data def extract_cid_from_html(self, html_text): @@ -182,48 +196,6 @@ class CaptchaResultGetter: # if __name__ == '__main__': - # print(int(time())) captchaResultGetter = CaptchaResultGetter() _url_encoded_data = captchaResultGetter.get_valid_cookie() print(_url_encoded_data) - - # print(urllib.parse.parse_qs((_url_encoded_data))) - # valid_cookie = captchaResultGetter.get_valid_cookie() - # simple_cookie = SimpleCookie() - # simple_cookie.load(valid_cookie) - # new_cookies = {k: v.value for k, v in simple_cookie.items()} - # new_coolies_str = "" - # for key in new_cookies: - # print(key) - # new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";" - # print(new_coolies_str) - # valid_cookie = new_coolies_str + "app=eyJmbGFzaCI6e30sImNhY2hlZmxhc2giOltdLCJjc3JmU2VjcmV0IjoiYnRodHNYU1lvdnl4RzVGakpGRDZsQ0JtIn0=;policy=accepted;lang=fr;" - # print(valid_cookie) -# # csrf = captchaResultGetter.extract_csrf_from_html( -# # """strong>Français / English / 中文 / 日本語 / 한국어 / Русский

Bienvenue chez Hermès

Demande de rendez-vous pour le vendredi 12 janvier

""") -# # print(csrf) -# contact_list = read_contacts('/Users/panlei/Desktop/yahoo_aol_valid_26-2.xlsx') -# # remove already booked contacts -# sub_contact_list = filter_contacts(contact_list) -# random.shuffle(sub_contact_list) -# valid_cookie = captchaResultGetter.get_valid_cookie() -# contact = random.choice(sub_contact_list) -# if valid_cookie is not None: -# # new_csrf = captchaResultGetter.get_csrf(valid_cookie) -# new_csrf = None -# # if new_csrf is not None: -# # print(new_csrf) -# simple_cookie = SimpleCookie() -# simple_cookie.load(valid_cookie) -# new_cookies = {k: v.value for k, v in simple_cookie.items()} -# new_coolies_str = "" -# for key in new_cookies: -# print(key) -# new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";" -# print(new_coolies_str) -# valid_cookie = new_coolies_str + "app=eyJmbGFzaCI6e30sImNhY2hlZmxhc2giOltdLCJjc3JmU2VjcmV0IjoiYnRodHNYU1lvdnl4RzVGakpGRDZsQ0JtIn0=;policy=accepted;lang=fr;" -# print(valid_cookie) -# new_csrf = captchaResultGetter.get_csrf(valid_cookie) -# if new_csrf is not None: -# sender = Sender(valid_cookie) -# sender.send_request(HERMES_REGISTER, contact=contact, csrf=new_csrf) diff --git a/workers/proxies_constants.py b/workers/proxies_constants.py index 05788da..9e08c1b 100644 --- a/workers/proxies_constants.py +++ b/workers/proxies_constants.py @@ -42,12 +42,16 @@ FR_PROXY_RES_OXY = { 'https': 'http://customer-rendezvous-cc-FR:Rdv202220212023@pr.oxylabs.io:7777' } - FR_PROXY_MOB_OXY = { 'http': 'http://customer-rendezvousmob-cc-FR:Rdv202220212023@pr.oxylabs.io:7777', 'https': 'http://customer-rendezvousmob-cc-FR:Rdv202220212023@pr.oxylabs.io:7777' } +FR_PROXY_MOB_OXY_STICKY = { + 'http': 'http://customer-rendezvousmob-cc-FR:Rdv202220212023@fr-pr.oxylabs.io:{}', + 'https': 'http://customer-rendezvousmob-cc-FR:Rdv202220212023@fr-pr.oxylabs.io:{}' +} + DE_PROXY_MOBILE = { 'http': 'http://8153587-mobile-country-DE:jp50x9jmo@194.88.106.169:11842', 'https': 'http://8153587-mobile-country-DE:jp50x9jmo@194.88.106.169:11842' @@ -57,14 +61,13 @@ DE_PROXY_RES = { 'https': 'http://8254565-res-country-DE:1st57izbhe@185.21.60.181:14648' } - # FR_PROXY_MOBILE # FR_PROXY_RES # PROXY_LIST = [FR_PROXY_MOBILE, FR_PROXY_RES, DE_PROXY_RES, DE_PROXY_MOBILE, ES_PROXY_MOBILE, IT_PROXY_MOBILE] # PROXY_LIST_FR = [FR_PROXY_MOBILE_2, FR_PROXY_MOBILE, FR_PROXY_RES, FR_PROXY_RES_4, FR_PROXY_RES_2] # PROXY_LIST_FR = [FR_PROXY_RES_OXY, FR_PROXY_MOBILE_3] PROXY_LIST_FR = [FR_PROXY_RES_OXY] -MOBILE_PROXY_LIST_FR = [FR_PROXY_MOB_OXY] +MOBILE_PROXY_LIST_FR = [FR_PROXY_MOB_OXY_STICKY] # PROXY_LIST = [FR_PROXY_MOBILE, FR_ # PROXY_RES] PROXY_LIST_DE = [DE_PROXY_RES, DE_PROXY_MOBILE] diff --git a/workers/sender.py b/workers/sender.py index db276eb..e189a3f 100644 --- a/workers/sender.py +++ b/workers/sender.py @@ -8,6 +8,7 @@ import requests from db.mongo_manager import MONGO_STORE_MANAGER from models.ReserveResultPojo import ReserveResultPojo, PublishType +from models.jsdata_pojo import JsDataPojo from models.result_pojo import RequestResult from queue_message.CookiesPublisher import CookiesPublisher from workers.proxies_constants import PROXY_LIST_FR @@ -46,7 +47,7 @@ class Sender: def get_csrf(self): pass - def send_request(self, url, contact: ContactPojo, csrf: str = None) -> RequestResult: + def send_request(self, url, js_data: JsDataPojo, contact: ContactPojo, csrf: str = None) -> RequestResult: if csrf is None: if self._csrf is None: _csrf = '8Bs2dBwb-nHONOzo9Tei2CcMZglEfsRqUz8E' @@ -56,7 +57,7 @@ class Sender: self._csrf = csrf _csrf = csrf headers = {'Content-Type': 'application/x-www-form-urlencoded', - 'User-Agent': 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36', + 'User-Agent': js_data.ua, 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8', 'Cookie': self.cookie_str, 'Referer': 'https://rendezvousparis.hermes.com/client/register', 'Sec-Fetch-Mode': 'navigate', @@ -67,9 +68,9 @@ class Sender: 'phone_country': "FR", 'phone_number': "+33 " + contact.phone, 'email': contact.mail, 'passport_id': contact.passport, 'processing': 'on', 'cgu': 'on'} print(data) + print("send request with cookie:{}".format(self.cookie_str)) try: proxy_to_use = self.proxy_to_use - # proxy_to_use = PROXY_LIST[0] print(proxy_to_use) response = requests.post(url=url, proxies=proxy_to_use, verify=False, headers=headers, data=data, timeout=15) @@ -93,6 +94,7 @@ class Sender: # self.cookie_str = new_coolies_str return RequestResult.SUCCESS else: + print(response.text) return RequestResult.BLOCKED except Exception as Error: print(Error) From 40c7c7cf96f27b884cde317738b8ca8f68ea5821 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Wed, 20 Mar 2024 14:18:38 +0100 Subject: [PATCH 02/22] log to file --- queue_message/appointmentrequestsender.py | 49 +++++++++--------- request_sender.py | 63 +++++++---------------- utils/AppLogging.py | 13 +++++ workers/sender.py | 13 ++--- 4 files changed, 63 insertions(+), 75 deletions(-) create mode 100644 utils/AppLogging.py diff --git a/queue_message/appointmentrequestsender.py b/queue_message/appointmentrequestsender.py index 7d2ed7d..7cdc2cc 100644 --- a/queue_message/appointmentrequestsender.py +++ b/queue_message/appointmentrequestsender.py @@ -48,10 +48,11 @@ def is_open(): class AppointmentRequestSender(threading.Thread): - def __init__(self, sub_contact_list: list, proxy_to_use_list, cookiesPublisher: CookiesPublisher, + def __init__(self, sub_contact_list: list, proxy_to_use_list, logger, cookiesPublisher: CookiesPublisher, queue_name=REQUEST_DATA_QUEUE): super().__init__() self.connection = None + self.logger = logger self.cookiesPublisher = cookiesPublisher self.channel = None self.valid_csrf = None @@ -65,22 +66,22 @@ class AppointmentRequestSender(threading.Thread): self.channel = self.connection.channel() def listen_to_queue(self, callback): - print("listen to queue {}".format(self.queue_name)) + self.logger.info("listen to queue {}".format(self.queue_name)) self.channel.basic_qos(prefetch_count=1) self.channel.basic_consume(queue=self.queue_name, auto_ack=False, on_message_callback=callback) self.channel.start_consuming() def on_message(self, ch, method, properties, body): _message_count = self.cookiesPublisher.message_count() - print("message count in queue is {}".format(_message_count)) + self.logger.info("message count in queue is {}".format(_message_count)) _received_object = body.decode("UTF-8") - print(f" [x] Received {_received_object}") + self.logger.info(f" [x] Received {_received_object}") _port = random.randint(40001, 49999) - print("generated port is {}".format(_port)) - _choosed_proxy = random.choice(self.proxy_to_use_list) + self.logger.info("generated port is {}".format(_port)) + _chosen_proxy = random.choice(self.proxy_to_use_list) _proxy_to_use = {} - _proxy_to_use["http"] = _choosed_proxy["http"].format(_port) - _proxy_to_use["https"] = _choosed_proxy["https"].format(_port) + _proxy_to_use["http"] = _chosen_proxy["http"].format(_port) + _proxy_to_use["https"] = _chosen_proxy["https"].format(_port) if "glrd" in _received_object: _received_dict = json.loads(_received_object) js_data = JsDataPojo(glrd=_received_dict['glrd'], glvd=_received_dict['glvd'], hc=_received_dict['hc'], @@ -93,16 +94,16 @@ class AppointmentRequestSender(threading.Thread): rs_w=_received_dict['rs_w'], rs_cd=_received_dict['rs_cd']) _received_cookies = _received_dict["cookiesStr"] sender = Sender(_received_cookies, cookiesPublisher=self.cookiesPublisher, received_dict=_received_dict, - proxy_to_use=_proxy_to_use) + proxy_to_use=_proxy_to_use, logger=self.logger) self.contact_list = filter_contacts(self.contact_list) # remove already booked contacts random.shuffle(self.contact_list) if len(self.contact_list) > 0 and is_open(): captchaResultGetter = CaptchaResultGetter() - print("contact number is {}".format(len(self.contact_list))) + self.logger.info("contact number is {}".format(len(self.contact_list))) self.contact_list = filter_contacts(self.contact_list) for con in self.contact_list: - print(con.mail) + self.logger.info(con.mail) if self.valid_csrf is None: self.valid_csrf = captchaResultGetter.get_csrf( proxy_to_use=_proxy_to_use, js_data=js_data, @@ -110,8 +111,8 @@ class AppointmentRequestSender(threading.Thread): _new_cookies = captchaResultGetter.get_valid_ch_cookie(sender.proxy_to_use, js_data, old_valid_cookie=_received_cookies) if _new_cookies is not None: - print("new cookie is " + _new_cookies) - time.sleep(random.randint(1, 5)) + # self.logger.info("new cookie is " + _new_cookies) + time.sleep(random.randint(1, 3)) # m_s_c = f.scroll m_s_c = random.randint(0, 3) m_c_c = random.randint(3, 5) # click count @@ -138,38 +139,38 @@ class AppointmentRequestSender(threading.Thread): js_le_type_data=js_le_data, old_valid_cookie=_new_cookies) if _new_le_cookies is not None: - print("new le type cookie is " + _new_le_cookies) + # self.logger.info("new le type cookie is " + _new_le_cookies) sender.cookie_str = _new_le_cookies - time.sleep(random.randint(1, 5)) + time.sleep(random.randint(1, 3)) can_continue = sender.send_request(HERMES_REGISTER, js_data, con, csrf=self.valid_csrf) else: can_continue = RequestResult.COOKIES_ERROR else: can_continue = RequestResult.COOKIES_ERROR if can_continue == RequestResult.BLOCKED: - print("cannot continue, valid_csrf is " + str(self.valid_csrf)) + self.logger.info("cannot continue, valid_csrf is " + str(self.valid_csrf)) break elif can_continue == RequestResult.PROXY_ERROR: - print("PROXY_ERROR, will not reset valid_csrf") + self.logger.info("PROXY_ERROR, will not reset valid_csrf") elif can_continue == RequestResult.COOKIES_ERROR: - print("COOKIES_ERROR, will not reset valid_csrf") + self.logger.info("COOKIES_ERROR, will not reset valid_csrf") else: - print("can continue, will reset valid_csrf") + self.logger.info("can continue, will reset valid_csrf") self.valid_csrf = None time.sleep(random.randint(1, 2)) - print("will ack method.delivery_tag: " + str(method.delivery_tag)) + self.logger.info("will ack method.delivery_tag: " + str(method.delivery_tag)) ch.basic_ack(delivery_tag=method.delivery_tag) else: - print("empty list") + self.logger.info("empty list") time.sleep(120) - print("will basic_reject method.delivery_tag: " + str(method.delivery_tag)) + self.logger.info("will basic_reject method.delivery_tag: " + str(method.delivery_tag)) ch.basic_reject(delivery_tag=method.delivery_tag, requeue=True) else: - print("not a valid object") + self.logger.info("not a valid object") ch.basic_ack(delivery_tag=method.delivery_tag) def run(self): - print(threading.currentThread().name + " starts") + self.logger.info(threading.currentThread().name + " starts") self.set_up_connection() self.listen_to_queue(self.on_message) self.channel.start_consuming() diff --git a/request_sender.py b/request_sender.py index 497b151..03a1875 100644 --- a/request_sender.py +++ b/request_sender.py @@ -1,19 +1,16 @@ import datetime -import random -import time -from http.cookies import SimpleCookie +import logging +import sys from threading import Thread from db.mongo_manager import MONGO_STORE_MANAGER from excel_reader import read_contacts from models.contact_pojo import ContactPojo -from queue_message.CookiesPublisher import CookiesPublisher, REQUEST_DATA_QUEUE_DE, REQUEST_DATA_QUEUE_TEST, \ - REQUEST_DATA_OBJECT, TEST_QUEUE, REGISTER_QUEUE -from queue_message.appointmentrequestsender import AppointmentRequestSender, REQUEST_DATA_QUEUE +from queue_message.CookiesPublisher import CookiesPublisher, TEST_QUEUE +from queue_message.appointmentrequestsender import AppointmentRequestSender from utiles import is_time_between -from workers.captcha_result_getter import CaptchaResultGetter, HERMES_REGISTER -from workers.proxies_constants import PROXY_LIST_DE, PROXY_LIST_FR, MOBILE_PROXY_LIST_FR, FR_PROXY_MOB_OXY_STICKY -from workers.sender import Sender +from utils.AppLogging import init_logger +from workers.proxies_constants import MOBILE_PROXY_LIST_FR IPFIY = 'http://api.ipify.org' NGROK_TEST = "https://bcc6-193-164-156-53.ngrok-free.app" @@ -41,64 +38,40 @@ def filter_contacts(_contact_list: list) -> list: return _contact_list_to_book -def get_valid_csrf() -> str: - captchaResultGetter = CaptchaResultGetter() - _valid_cookie = captchaResultGetter.get_valid_cookie() - # while _valid_cookie is None: - # _valid_cookie = captchaResultGetter.get_valid_cookie() - new_csrf = None - while new_csrf is None and is_open(): - valid_cookie = None - if _valid_cookie is not None: - simple_cookie = SimpleCookie() - simple_cookie.load(_valid_cookie) - new_cookies = {k: v.value for k, v in simple_cookie.items()} - new_coolies_str = "" - for key in new_cookies: - print(key) - new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";" - print(new_coolies_str) - valid_cookie = new_coolies_str + "app=eyJmbGFzaCI6e30sImNhY2hlZmxhc2giOltdLCJjc3JmU2VjcmV0IjoiYnRodHNYU1lvdnl4RzVGakpGRDZsQ0JtIn0=;policy=accepted;lang=fr;" - print(valid_cookie) - new_csrf = captchaResultGetter.get_csrf(valid_cookie) - if new_csrf is None: - _valid_cookie = None - while _valid_cookie is None: - _valid_cookie = captchaResultGetter.get_valid_cookie() - time.sleep(2) - return new_csrf - - def is_open(): return is_time_between(datetime.time(10, 30), datetime.time(19, 00)) count = 0 +init_logger() +logger = logging.getLogger() + +logger.addHandler(logging.StreamHandler(stream=sys.stdout)) def send_appointment_request(message_queue_name, _contact_list): global count count = count + 1 + for _contact in _contact_list: + logger.info(_contact) _cookiesPublisher = CookiesPublisher(queue_name=message_queue_name) _cookiesPublisher.set_up_connection() receiver = AppointmentRequestSender(sub_contact_list=_contact_list, proxy_to_use_list=MOBILE_PROXY_LIST_FR, queue_name=message_queue_name, - cookiesPublisher=_cookiesPublisher) + cookiesPublisher=_cookiesPublisher, logger=logger) print("count is " + str(count)) receiver.run() if __name__ == '__main__': - contacts_file_path = '~/Desktop/contact_list_2024-03-15.xlsx' + contacts_file_path = '~/Desktop/contact_list_2024-03-16.xlsx' _contact_list = read_contacts(contacts_file_path) _contact_list_to_book = filter_contacts(_contact_list) - _segment_number = 5 - print("{} contacts to book".format(len(_contact_list_to_book))) + _segment_number = 2 + logger.info("{} contacts to book".format(len(_contact_list_to_book))) for i in range(0, _segment_number): - print(i) + logger.info("segment is {}".format(i)) _step = int(len(_contact_list_to_book) / _segment_number) - _sublist = _contact_list_to_book[i:_step * (i + 1)] - print(_sublist[0]) - print(_sublist[-1]) + _sublist = _contact_list_to_book[i * _step:_step * (i + 1)] _thread1 = Thread(target=send_appointment_request, args=(TEST_QUEUE, _sublist)) _thread1.start() diff --git a/utils/AppLogging.py b/utils/AppLogging.py new file mode 100644 index 0000000..ca8b8b8 --- /dev/null +++ b/utils/AppLogging.py @@ -0,0 +1,13 @@ +import datetime +import logging +from pathlib import Path + +LOGS_DIR = str(Path.home()) + + +def init_logger(): + logging.basicConfig(filename=LOGS_DIR + "/request_{}.log".format(str(datetime.date.today())), + filemode='a', + format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + level=logging.INFO) diff --git a/workers/sender.py b/workers/sender.py index e189a3f..8b861cb 100644 --- a/workers/sender.py +++ b/workers/sender.py @@ -16,12 +16,13 @@ from workers.proxies_constants import PROXY_LIST_FR class Sender: - def __init__(self, cookie_str, cookiesPublisher: CookiesPublisher, received_dict, proxy_to_use): + def __init__(self, cookie_str, cookiesPublisher: CookiesPublisher, received_dict, proxy_to_use, logger): self.store_type = "random" self.cookie = SimpleCookie() self.cookiesPublisher = cookiesPublisher # self.cookie_str = 'datadome=~pxdHFAvsQl2rvDrTzhPgCHxu~4TBcePTTE~Cy8Rgol6oMRc11gA02VRp0Z3uEDUszCjacubNu7vbfQCh27gz8RC10u_325pt_gsMmJh1ScGvOofVJiVAbEKvSEUjd82;policy=accepted;app.sig=PhjmDkq_dI49pADppDNKxpLe_G4;app=eyJmbGFzaCI6e30sImNhY2hlZmxhc2giOltdLCJjc3JmU2VjcmV0IjoiYnRodHNYU1lvdnl4RzVGakpGRDZsQ0JtIn0=;lang=fr;' self.cookie_str = cookie_str + self.logger = logger self._csrf = None self.received_dict = received_dict self.proxy_to_use = proxy_to_use @@ -74,11 +75,11 @@ class Sender: print(proxy_to_use) response = requests.post(url=url, proxies=proxy_to_use, verify=False, headers=headers, data=data, timeout=15) - print(response.status_code) + self.logger.info(response.status_code) if response.status_code == 200: # add to mongodb - print(response.text) - print(response.url) + self.logger.info(response.text) + self.logger.info("{}:{}".format(contact.mail, response.url)) self.publish_message_to_queue(contact, status=PublishType.SUCCESS, url=response.url) cookies_to_set = response.headers['set-cookie'] self.cookie.load(cookies_to_set) @@ -86,10 +87,10 @@ class Sender: new_coolies_str = "" for key in new_cookies: new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";" - print("will publish to queue {}".format(new_coolies_str)) + self.logger.info("will publish to queue {}".format(new_coolies_str)) # upload the cookie to queue self.received_dict['cookiesStr'] = new_coolies_str - print("body in json:{}".format(json.dumps(self.received_dict))) + self.logger.info("body in json:{}".format(json.dumps(self.received_dict))) self.cookiesPublisher.publish_body(json.dumps(self.received_dict)) # self.cookie_str = new_coolies_str return RequestResult.SUCCESS From 67b6a181cbc92d233494f602532a3b3a40ecda3f Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Thu, 21 Mar 2024 11:25:01 +0100 Subject: [PATCH 03/22] log to file --- queue_message/appointmentrequestsender.py | 24 ++++++++++++++++++----- workers/sender.py | 7 ++++++- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/queue_message/appointmentrequestsender.py b/queue_message/appointmentrequestsender.py index 7cdc2cc..b03d826 100644 --- a/queue_message/appointmentrequestsender.py +++ b/queue_message/appointmentrequestsender.py @@ -7,6 +7,7 @@ import time import pika from db.mongo_manager import MONGO_STORE_MANAGER +from models.ReserveResultPojo import ReserveResultPojo from models.contact_pojo import ContactPojo from models.jsdata_le_pojo import JsDataLeTypePojo from models.jsdata_pojo import JsDataPojo @@ -29,14 +30,19 @@ def is_already_sent(contact: ContactPojo) -> bool: return False -def filter_contacts(_contact_list: list) -> list: +def filter_contacts(_contact_list: list, provided_list=[]) -> list: already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day() + already_sent_contacts.extend(provided_list) _contact_list_to_book = [] for contact in _contact_list: _to_add = True for booked in already_sent_contacts: - if contact.mail == booked.email: - _to_add = False + if isinstance(booked, ReserveResultPojo): + if contact.mail == booked.email: + _to_add = False + else: + if contact.mail == booked.mail: + _to_add = False if _to_add: _contact_list_to_book.append(contact) @@ -49,10 +55,13 @@ def is_open(): class AppointmentRequestSender(threading.Thread): def __init__(self, sub_contact_list: list, proxy_to_use_list, logger, cookiesPublisher: CookiesPublisher, + just_send=False, queue_name=REQUEST_DATA_QUEUE): super().__init__() self.connection = None + self.just_send = just_send self.logger = logger + self.already_tried_contact_list = [] self.cookiesPublisher = cookiesPublisher self.channel = None self.valid_csrf = None @@ -95,13 +104,17 @@ class AppointmentRequestSender(threading.Thread): _received_cookies = _received_dict["cookiesStr"] sender = Sender(_received_cookies, cookiesPublisher=self.cookiesPublisher, received_dict=_received_dict, proxy_to_use=_proxy_to_use, logger=self.logger) - self.contact_list = filter_contacts(self.contact_list) + # remove already sent contacts + if self.just_send: + self.contact_list = filter_contacts(self.contact_list, self.already_tried_contact_list) + else: + self.contact_list = filter_contacts(self.contact_list) # remove already booked contacts random.shuffle(self.contact_list) if len(self.contact_list) > 0 and is_open(): captchaResultGetter = CaptchaResultGetter() self.logger.info("contact number is {}".format(len(self.contact_list))) - self.contact_list = filter_contacts(self.contact_list) + # self.contact_list = filter_contacts(self.contact_list) for con in self.contact_list: self.logger.info(con.mail) if self.valid_csrf is None: @@ -142,6 +155,7 @@ class AppointmentRequestSender(threading.Thread): # self.logger.info("new le type cookie is " + _new_le_cookies) sender.cookie_str = _new_le_cookies time.sleep(random.randint(1, 3)) + self.already_tried_contact_list.append(con) can_continue = sender.send_request(HERMES_REGISTER, js_data, con, csrf=self.valid_csrf) else: can_continue = RequestResult.COOKIES_ERROR diff --git a/workers/sender.py b/workers/sender.py index 8b861cb..cd1033e 100644 --- a/workers/sender.py +++ b/workers/sender.py @@ -68,7 +68,8 @@ class Sender: 'surname': contact.last_name.capitalize(), 'name': contact.first_name.capitalize(), 'phone_country': "FR", 'phone_number': "+33 " + contact.phone, 'email': contact.mail, 'passport_id': contact.passport, 'processing': 'on', 'cgu': 'on'} - print(data) + self.logger.info(data) + self.logger.info("try to request for mail:{}".format(contact.mail)) print("send request with cookie:{}".format(self.cookie_str)) try: proxy_to_use = self.proxy_to_use @@ -96,6 +97,10 @@ class Sender: return RequestResult.SUCCESS else: print(response.text) + if response.status_code == 403: + # try to load seesion + # session = HTMLSession() + return RequestResult.BLOCKED return RequestResult.BLOCKED except Exception as Error: print(Error) From dead188a6914e93884dcfb8339e977dfdcb0ca0d Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Thu, 21 Mar 2024 14:55:17 +0100 Subject: [PATCH 04/22] read mail after request --- db/mongo_manager.py | 38 ++++ mail/__init__.py | 0 mail/mail_constants.py | 106 +++++++++ mail/mail_reader_all_contacts.py | 265 ++++++++++++++++++++++ models/contact_pojo.py | 8 +- models/mail_pojo.py | 36 +++ queue_message/appointmentrequestsender.py | 14 ++ request_sender.py | 7 +- workers/sender.py | 3 +- 9 files changed, 471 insertions(+), 6 deletions(-) create mode 100644 mail/__init__.py create mode 100755 mail/mail_constants.py create mode 100755 mail/mail_reader_all_contacts.py create mode 100644 models/mail_pojo.py diff --git a/db/mongo_manager.py b/db/mongo_manager.py index dc19c6e..8c1b764 100755 --- a/db/mongo_manager.py +++ b/db/mongo_manager.py @@ -7,6 +7,7 @@ from pymongo import MongoClient from models.LinkPojo import LinkPojo from models.ReserveResultPojo import ReserveResultPojo from models.contact_pojo import ContactPojo +from models.mail_pojo import MailAddress MONGO_DB_URL = "mongo.lpaconsulting.fr" CAPTCHA_ERROR_COLLECTION_PREFIX = "CAPTCHA_ERROR_" @@ -72,6 +73,43 @@ class MongoDbManager: result_list.append(ContactPojo.from_firestore_dict(document)) return result_list + def save_links_to_validate(self, link: str, mail_address: str, _all_contact_list: list): + collection_to_use = self.db[LINKS_TO_VALIDATE] + updated_at = time.strftime("%H:%M:%S", time.localtime()) + _ip_country = "FR" + # find ip_country info + for _contact in _all_contact_list: + if _contact.mail == mail_address: + _ip_country = _contact.ip_country + + if len(mail_address) > 0: + collection_to_use.replace_one(filter={'_id': mail_address, }, replacement={ + u'url': link, + u'email': mail_address, + u'ip_country': _ip_country, + "updated_at": updated_at + }, + upsert=True) + else: + collection_to_use.replace_one(filter={'_id': link, }, replacement={ + u'url': link, + u'ip_country': _ip_country, + "updated_at": updated_at + }, + upsert=True) + + def get_destination_emails(self) -> list: + collection_name = DESTINATION_EMAIL_LIST + email_list = [] + try: + collection_to_use = self.db[collection_name] + for document in collection_to_use.find(): + email_list.append(MailAddress.from_firestore_dict(document)) + except Exception as error: + self.logger.info(error) + print(error) + return email_list + def get_links_to_validate(self) -> list: collection_name = LINKS_TO_VALIDATE link_list = [] diff --git a/mail/__init__.py b/mail/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mail/mail_constants.py b/mail/mail_constants.py new file mode 100755 index 0000000..7e88937 --- /dev/null +++ b/mail/mail_constants.py @@ -0,0 +1,106 @@ +import imaplib + +from imapclient import IMAPClient + +DOMAIN_YAHOO = "yahoo.com" +DOMAIN_SINA = "sina.com" +DOMAIN_HOTMAIL = "hotmail.com" +DOMAIN_TIM_IT = "tim.it" +DOMAIN_163 = "163.com" +DOMAIN_RAMBLER_RU = "rambler.ru" +DOMAIN_ALICE_IT = "alice.it" +DOMAIN_MARS_DTI_NE_JP = "mars.dti.ne.jp" +DOMAN_BTVM_NE_JP = "btvm.ne.jp" +DOMAN_AURORA_DTI_NE_JP = "aurora.dti.ne.jp" +DOMAN_GMAIL = "gmail.com" +DOMAIN_GMX = "gmx.com" +DOMAIN_GMX_FR = "gmx.fr" +DOMAIN_GMX_US = "gmx.us" +DOMAIN_GMX_CH = "gmx.ch" +DOMAIN_ONET = "onet.pl" +DOMAIN_NAVER = "naver.com" +DOMAIN_INBOX_LV = "inbox.lv" + +DOMAIN_PISS_MAIL = "pissmail.com" +DOMAIN_INCEL_EMAIL = "incel.email" +DOMAIN_SHITPOSTING_EXPERT = "shitposting.expert" +DOMAIN_HATESJE_WS = "hatesje.ws" +DOMAIN_CHILD_PIZZA = "child.pizza" +DOMAIN_GENOCIDE_FUN = "genocide.fun" +DOMAIN_DMC_CHAT = "dmc.chat" + +AOL_IMAP_SERVER = "imap.aol.com" +IMAP_SERVER_163 = "imap.163.com" +IMAP_SERVER_SINA = "imap.sina.com" +YAHOO_IMAP_SERVER = "imap.mail.yahoo.com" +HOTMAIL_IMAP_SERVER = "outlook.office365.com" +RAMBLER_IMAP_SERVER = "imap.rambler.ru" +ALICE_IMAP_SERVER = "in.alice.it" +TIME_IT_SERVER = "imap.tim.it" +MARS_DTI_NE_JP_SERVER = "imap.cm.dream.jp" +NAVER_SERVER = "imap.naver.com" +BTVM_NE_JP = "imap.btvm.ne.jp" +SEREVER_GMAIL = "imap.gmail.com" +SERVER_IMAGE_ONET = "imap.poczta.onet.pl" +SERVER_GMX = "imap.gmx.com" +SERVER_PISS_MAIL = "mail.pissmail.com" +INBOX_LV = "mail.inbox.lv" + + +def show_folders(imap) -> list: + folders = [] + isImapClient = isinstance(imap, IMAPClient) + if not isImapClient: + for i in imap.list()[1]: + l = i.decode().split(' "/" ') + if len(l) > 1: + folders.append(l[1]) + if len(folders) == 0: + folders.append('INBOX') + return folders + else: + list = imap.list_folders() + for i in list: + name = i[-1] + folders.append(name) + return folders + + +def create_imap(login: str): + # create an IMAP4 class with SSL + if DOMAIN_163 in login: + imap = IMAPClient(IMAP_SERVER_163, use_uid=True) + elif DOMAIN_YAHOO in login: + imap = imaplib.IMAP4_SSL(YAHOO_IMAP_SERVER) + elif DOMAIN_GMX in login or DOMAIN_GMX_FR in login or DOMAIN_GMX_US in login or DOMAIN_GMX_CH in login: + imap = imaplib.IMAP4_SSL(SERVER_GMX) + elif DOMAIN_SINA in login: + # imap = imaplib.IMAP4_SSL(IMAP_SERVER_SINA) + imap = IMAPClient(IMAP_SERVER_SINA, use_uid=True) + elif DOMAIN_HOTMAIL in login: + imap = imaplib.IMAP4_SSL(HOTMAIL_IMAP_SERVER) + elif DOMAIN_RAMBLER_RU in login: + imap = imaplib.IMAP4_SSL(RAMBLER_IMAP_SERVER) + elif DOMAN_BTVM_NE_JP in login: + imap = imaplib.IMAP4_SSL(BTVM_NE_JP) + elif DOMAN_GMAIL in login: + imap = imaplib.IMAP4_SSL(SEREVER_GMAIL, port=993) + elif DOMAIN_ONET in login: + imap = IMAPClient(SERVER_IMAGE_ONET, use_uid=True) + elif DOMAIN_TIM_IT in login: + imap = imaplib.IMAP4(TIME_IT_SERVER) + elif DOMAIN_ALICE_IT in login: + imap = imaplib.IMAP4(ALICE_IMAP_SERVER, port=143) + elif DOMAIN_MARS_DTI_NE_JP in login: + imap = imaplib.IMAP4(MARS_DTI_NE_JP_SERVER, port=143) + elif DOMAN_AURORA_DTI_NE_JP in login: + imap = imaplib.IMAP4(MARS_DTI_NE_JP_SERVER, port=143) + elif DOMAIN_NAVER in login: + imap = imaplib.IMAP4_SSL(NAVER_SERVER, port=993) + elif DOMAIN_INBOX_LV in login: + imap = imaplib.IMAP4_SSL(INBOX_LV, port=993) + elif DOMAIN_PISS_MAIL in login or DOMAIN_CHILD_PIZZA in login or DOMAIN_DMC_CHAT in login or DOMAIN_GENOCIDE_FUN in login or DOMAIN_HATESJE_WS in login or DOMAIN_INCEL_EMAIL in login or DOMAIN_SHITPOSTING_EXPERT in login: + imap = imaplib.IMAP4_SSL(SERVER_PISS_MAIL, port=993) + else: + imap = imaplib.IMAP4_SSL(AOL_IMAP_SERVER) + return imap diff --git a/mail/mail_reader_all_contacts.py b/mail/mail_reader_all_contacts.py new file mode 100755 index 0000000..3e2232f --- /dev/null +++ b/mail/mail_reader_all_contacts.py @@ -0,0 +1,265 @@ +import datetime +import email +import re +from builtins import list +from concurrent.futures import ThreadPoolExecutor +from datetime import time +from email.header import decode_header +from email.message import Message + +from imapclient import IMAPClient +from db.mongo_manager import MONGO_STORE_MANAGER +from mail.mail_constants import DOMAIN_HOTMAIL, create_imap +from models.mail_pojo import MailPojo + +VALIDATION_URL_SUBJECT_fr = 'Validation de votre demande de rendez-vous' +VALIDATION_URL_SUBJECT_EN = 'Please confirm your appointment request' +VALIDATION_URL_REGEX = """https:\/\/rendezvousparis.hermes.com\/client\/register\/[A-Z0-9]+\/validate.code=[A-Z0-9]+""" +PART_VALIDATION_URL_REGEX = """client\/register\/[A-Z0-9]+\/validate.code=[A-Z0-9]+""" +HERMES_EMAIL = "no-reply@hermes.com" +EMAIL_ADDRESS_REGEX = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b' + +date_format = "%d-%b-%Y" # DD-Mon-YYYY e.g., 3-Mar-2014 +REDIRECTION_MAILS = "appointment2022@aol.com, chenpeijun@aol.com,hongjiang176@aol.com,ciyuexie@aol.com,rutger.62@aol.com,ciccidaniel@aol.com,armasgoodman@aol.com,wknd.gemerine@aol.com,rafmail1981@aol.com,tonovichivanenaki@aol.com,hetland.ari@aol.com,mateusiversen@aol.com,lacerdaraffaello@aol.com,anasida76@aol.com,liamolinari@aol.com,sen70zib@aol.com,mezeiderrick@aol.com,stanisl49avchic@aol.com,damcvrobaneuron@aol.com,suyzanna_fleona@aol.com,dxealing.dissa@aol.com,hogg.karen@aol.com,obocharovamarina@aol.com,buchholzjohann@aol.com,orn.cecchini@aol.com,percivaltorgersen@aol.com,candalgudrun@aol.com,filimonis.76@aol.com,bengann_100@aol.com,axelhanne@aol.com,tiffanylarochelle@aol.com,nicoleta.r@aol.com,eichenbaum.1963@aol.com,kotensasharev@aol.com,samognat32@aol.com,edem_headshot@aol.com,kozmakuzmich1960@aol.com,damonsvensson@aol.com,anders.riva@aol.com,caiminwei123@gmail.com,yulingguo086@gmail.com,yingxiaolu086@gmail.com,lijiazhen0035@gmail.com,fangp370@gmail.com,huangyayu10086@gmail.com,fuziyuan110@gmail.com,xinyingdu886@gmail.com,yasiaforever.1971@aol.com,lukaszfidalgo@aol.com,zaichi29@aol.com,prostotakitak.1974@aol.com,mo90nroe@aol.com,blonde.87@aol.com,dimidrol.1969@aol.com" + + +def check_email_address(email): + # pass the regular expression + # and the string into the fullmatch() method + if (re.fullmatch(EMAIL_ADDRESS_REGEX, email)): + print("Valid Email") + return True + else: + print("Invalid Email") + return False + + +def find_from_mail(param): + from_address, encoded_algo = param[0] + if isinstance(from_address, bytes): + from_address = from_address.decode(encoded_algo) + if not check_email_address(from_address) and len(param) == 2: + from_address, new_encode = param[1] + if new_encode is None: + new_encode = encoded_algo + if isinstance(from_address, bytes): + from_address = from_address.decode(new_encode) + return from_address.strip(" ").strip(">").strip("<") + return from_address.strip(" ").strip(">").strip("<") + + +class MailReader(): + def __init__(self, login, password): + self.login = login + self.password = password + + @staticmethod + def show_folders(imap) -> list: + folders = [] + isImapClient = isinstance(imap, IMAPClient) + if not isImapClient: + for i in imap.list()[1]: + l = i.decode().split(' "/" ') + folders.append(l[1]) + return folders + else: + list = imap.list_folders() + for i in list: + name = i[-1] + folders.append(name) + return folders + + def read_emails(self, mails_messages: list) -> list: + imap = create_imap(self.login) + isImapClient = isinstance(imap, IMAPClient) + print("isImapClient is " + str(isImapClient)) + if isImapClient: + # authenticate + dat = imap.login(self.login, str(self.password)) + print("type is {} for {}".format(dat, self.login)) + else: + responseType, dat = imap.login(self.login, str(self.password)) + print("type is {} for {}".format(responseType, self.login)) + + mail_list = [] + print("read mails from {}".format(self.login)) + if not isImapClient: + folder_list = self.show_folders(imap) + for folder in folder_list: + print("folder is {}".format(folder)) + mail_list.extend(self._get_messages_from_folder(imap, subject=VALIDATION_URL_SUBJECT_fr, + folder=folder)) + mail_list.extend(self._get_messages_from_folder(imap, subject=VALIDATION_URL_SUBJECT_EN, + folder=folder)) + else: + folder_list = self.show_folders(imap) + for folder in folder_list: + print("folder is " + folder) + mail_list.extend(self._get_messages_from_folder_for_imapclient(imap, folder=folder)) + if DOMAIN_HOTMAIL in self.login: + mail_list.extend( + self._get_messages_from_folder_for_imapclient(imap, folder="Junk")) + if not isImapClient: + imap.close() + imap.logout() + mails_messages.extend(mail_list) + return mail_list + + def _get_messages_from_folder(self, imap, subject, folder="INBOX") -> list: + imap.select(folder) + mail_messages = [] + typ, data = imap.search(None, '(SUBJECT "{}" SINCE "{}")'.format(subject, + datetime.datetime.today().strftime( + date_format))) + for i in data[0].split(): + # fetch the email message by ID + res, msg = imap.fetch(i.decode("utf-8"), "(RFC822)") + body = '' + for response in msg: + if isinstance(response, tuple): + # parse a bytes email into a message object + msg = email.message_from_bytes(response[1]) + # decode the email subject + subject, subject_encoded = decode_header(msg["Subject"])[0] + received_date = msg["Date"] + if isinstance(subject, bytes): + # if it's a bytes, decode to str + subject = subject.decode(subject_encoded) + # decode email sender + from_address = find_from_mail(decode_header(msg.get("From"))) + to_email = find_from_mail(decode_header(msg.get("To"))) + print("Email:", self.login) + print("From:", from_address) + print("To:", to_email) + print("Subject:", subject) + # if the email message is multipart + if msg.is_multipart(): + # iterate over email parts + for part in msg.walk(): + try: + # get the email body + payloads = part.get_payload() + if isinstance(payloads, list): + for payload in payloads: + if isinstance(payload, Message): + body = body + payload.get_payload(decode=True).decode("iso-8859-1") + # print(body) + except Exception as Error: + print(Error) + else: + body = msg.get_payload(decode=True).decode() + print(body) + if VALIDATION_URL_SUBJECT_fr in subject or VALIDATION_URL_SUBJECT_EN in subject: + mail = MailPojo(subject=subject, body=body, from_address=from_address) + if to_email is None: + mail.to_address = self.login + else: + mail.to_address = to_email + mail.mail_address = self.login + mail_messages.append(mail) + return mail_messages + + def _get_messages_from_folder_for_imapclient(self, imap, folder="INBOX") -> list: + mail_messages = [] + search_terms = 'SINCE "{}"'.format( + datetime.datetime.today().strftime( + date_format)) + print("search terms is " + search_terms) + imap.select_folder(folder) + messages = imap.search(['SINCE', datetime.datetime.today()]) + print("%d messages from our best friend" % len(messages)) + for uid, message_data in imap.fetch(messages, 'RFC822').items(): + try: + email_message = email.message_from_bytes(message_data[b'RFC822']) + from_address = email_message.get('FROM') + subject = email_message.get('subject') + # print("{}, {},{}".format(from_address, subject, email_message)) + body = "" + if "no-reply@hermes.com" in from_address or "appointment2022@aol.com": + for part in email_message.walk(): + print(part.get_content_type()) + if part.get_content_type() == "text/html": + body = body + part.get_payload(decode=True).decode("utf-8") + elif part.get_content_type() == "text/plain": + body = body + part.get_payload() + if VALIDATION_URL_SUBJECT_fr in subject or VALIDATION_URL_SUBJECT_EN in subject: + mail = MailPojo(subject=subject, body=body, from_address=from_address) + mail.isImapClient = True + print("email is {}".format(self.login)) + print("body is {}".format(body)) + print("subject is {}".format(subject)) + mail_messages.append(mail) + except Exception as error: + print(error) + print("error trying to read email_Message for {}".format(self.login)) + return mail_messages + + +def need_to_valid_url(url: str, successful_items) -> bool: + # return True + # if len(successful_items) == 0: + # return False + print("url is :" + url) + parts = url.split('/') + id = parts[5] + if len(id) == 6: + if id == "CS93VB": + print("found") + for item in successful_items: + if item.id == id: + if item.url_validated is not None: + return not item.url_validated + else: + # if url_validated is None + if item.url_validated is not None: + return not item.url_validated + return True + return True + else: + print("id not valid:{}".format(id)) + return False + + +def need_to_check_email(mail: str, successful_items) -> bool: + print("successful_items size is " + str(len(successful_items))) + filtered_items = list(filter(lambda item: item.email == mail, successful_items)) + # has validated value + if len(filtered_items) > 0: + validated_items = list(filter( + lambda filtered_item: filtered_item.url_validated is not None and filtered_item.url_validated is True, + filtered_items)) + if len(validated_items) > 0: + return False + else: + return True + else: + return True + + +def find_links_to_validate_from_mail_list(mail_list: list): + # check time before start checking emails + contact_to_book_list = MONGO_STORE_MANAGER.get_all_contact_to_book_list() + successful_items = MONGO_STORE_MANAGER.get_all_successful_items_for_day() + mails_messages = [] + with ThreadPoolExecutor(max_workers=len(mail_list)) as executor: + for mail in mail_list: + # check whether we need to read mail + if need_to_check_email(mail.mail, successful_items): + mail_reader = MailReader(mail.mail, mail.password) + executor.submit(mail_reader.read_emails, mails_messages) + # for mail in mail_list: + # # check whether we need to read mail + # if need_to_check_email(mail.mail, successful_items): + # mail_reader = MailReader(mail.mail, mail.password) + # mail_reader.read_emails(mails_messages) + + for mail in mails_messages: + match = re.search(VALIDATION_URL_REGEX, mail.body) + if match: + url = match.group(0) + if need_to_valid_url(url, successful_items): + print("need to validate url: " + url) + MONGO_STORE_MANAGER.save_links_to_validate(url, mail.to_address, + _all_contact_list=contact_to_book_list) + else: + print("do not need to click url --> {}".format(mail.mail_address)) diff --git a/models/contact_pojo.py b/models/contact_pojo.py index 4c66395..b841c20 100755 --- a/models/contact_pojo.py +++ b/models/contact_pojo.py @@ -39,13 +39,15 @@ class ContactPojo: @staticmethod def from_firestore_dict(source): - ccid = source['ccid'] phone = source['phone'] - position = source['position'] passport = source['passport'] email = source['mail'] last_name = source['last_name'] first_name = source['first_name'] - result = ContactPojo(ccid=ccid, phone_number=phone, passport_number=passport, position=position, mail=email, + ip_country = "FR" + if source.get('ip_country'): + ip_country = source['ip_country'] + result = ContactPojo(phone_number=phone, passport_number=passport, mail=email, last_name=last_name, first_name=first_name) + result.ip_country = ip_country return result diff --git a/models/mail_pojo.py b/models/mail_pojo.py new file mode 100644 index 0000000..fb054e0 --- /dev/null +++ b/models/mail_pojo.py @@ -0,0 +1,36 @@ +class MailAddress: + def __init__(self, mail, password): + self.mail = mail + self.password = password + + def __repr__(self): + return "邮箱:{}, 密码:{}".format(self.mail, self.password) + + def to_firestore_dict(self): + dest = { + u'mail': self.mail, + u'password': self.password + } + return dest + + @staticmethod + def from_firestore_dict(source): + password = source['password'] + mail = source['mail'] + return MailAddress(mail=mail, password=password) + + +class MailPojo: + from_address: str + to_address: str + body: str + subject: str + mail_address: str = "" + isImapClient = False + + def __init__(self, from_address, body, subject): + self.body = body + self.subject = subject + self.from_address = from_address + self.isImapClient = False + self.to_address = "" diff --git a/queue_message/appointmentrequestsender.py b/queue_message/appointmentrequestsender.py index b03d826..5b45265 100644 --- a/queue_message/appointmentrequestsender.py +++ b/queue_message/appointmentrequestsender.py @@ -7,6 +7,7 @@ import time import pika from db.mongo_manager import MONGO_STORE_MANAGER +from mail.mail_reader_all_contacts import find_links_to_validate_from_mail_list from models.ReserveResultPojo import ReserveResultPojo from models.contact_pojo import ContactPojo from models.jsdata_le_pojo import JsDataLeTypePojo @@ -65,6 +66,7 @@ class AppointmentRequestSender(threading.Thread): self.cookiesPublisher = cookiesPublisher self.channel = None self.valid_csrf = None + self.list_to_retrieve_mails = sub_contact_list self.contact_list = sub_contact_list self.queue_name = queue_name self.proxy_to_use_list = proxy_to_use_list @@ -175,6 +177,7 @@ class AppointmentRequestSender(threading.Thread): self.logger.info("will ack method.delivery_tag: " + str(method.delivery_tag)) ch.basic_ack(delivery_tag=method.delivery_tag) else: + self.retreive_invalidate_urls() self.logger.info("empty list") time.sleep(120) self.logger.info("will basic_reject method.delivery_tag: " + str(method.delivery_tag)) @@ -188,3 +191,14 @@ class AppointmentRequestSender(threading.Thread): self.set_up_connection() self.listen_to_queue(self.on_message) self.channel.start_consuming() + + def retreive_invalidate_urls(self): + self.logger.info("will retreive_invalidate_urls") + _mail_list = MONGO_STORE_MANAGER.get_destination_emails() + _mail_list_filtred = [] + for mail in _mail_list: + for _contact in self.list_to_retrieve_mails: + if _contact.mail == mail.mail: + _mail_list_filtred.append(mail) + self.logger.info("will call find_links_to_validate_from_mail_list, size = " + str(len(_mail_list_filtred))) + find_links_to_validate_from_mail_list(_mail_list_filtred) diff --git a/request_sender.py b/request_sender.py index 03a1875..0017c8e 100644 --- a/request_sender.py +++ b/request_sender.py @@ -57,21 +57,24 @@ def send_appointment_request(message_queue_name, _contact_list): _cookiesPublisher = CookiesPublisher(queue_name=message_queue_name) _cookiesPublisher.set_up_connection() receiver = AppointmentRequestSender(sub_contact_list=_contact_list, proxy_to_use_list=MOBILE_PROXY_LIST_FR, - queue_name=message_queue_name, + queue_name=message_queue_name, just_send=True, cookiesPublisher=_cookiesPublisher, logger=logger) print("count is " + str(count)) receiver.run() if __name__ == '__main__': - contacts_file_path = '~/Desktop/contact_list_2024-03-16.xlsx' + contacts_file_path = '~/Desktop/contact_list_2024-03-20.xlsx' _contact_list = read_contacts(contacts_file_path) _contact_list_to_book = filter_contacts(_contact_list) _segment_number = 2 logger.info("{} contacts to book".format(len(_contact_list_to_book))) + last_thread = None for i in range(0, _segment_number): logger.info("segment is {}".format(i)) _step = int(len(_contact_list_to_book) / _segment_number) _sublist = _contact_list_to_book[i * _step:_step * (i + 1)] _thread1 = Thread(target=send_appointment_request, args=(TEST_QUEUE, _sublist)) + last_thread = _thread1 _thread1.start() + last_thread.join() diff --git a/workers/sender.py b/workers/sender.py index cd1033e..1f59f89 100644 --- a/workers/sender.py +++ b/workers/sender.py @@ -73,7 +73,7 @@ class Sender: print("send request with cookie:{}".format(self.cookie_str)) try: proxy_to_use = self.proxy_to_use - print(proxy_to_use) + # print(proxy_to_use) response = requests.post(url=url, proxies=proxy_to_use, verify=False, headers=headers, data=data, timeout=15) self.logger.info(response.status_code) @@ -100,6 +100,7 @@ class Sender: if response.status_code == 403: # try to load seesion # session = HTMLSession() + self.logger.info("blocked by hermes, mail:{}".format(contact.mail)) return RequestResult.BLOCKED return RequestResult.BLOCKED except Exception as Error: From 9a238eb6429e65ae162c20b498f258fa0cc152e8 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Thu, 21 Mar 2024 17:26:42 +0100 Subject: [PATCH 05/22] read mail after request --- queue_message/appointmentrequestsender.py | 23 ++++++++++++++--------- request_sender.py | 4 ++-- workers/sender.py | 1 + 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/queue_message/appointmentrequestsender.py b/queue_message/appointmentrequestsender.py index 5b45265..b7e0849 100644 --- a/queue_message/appointmentrequestsender.py +++ b/queue_message/appointmentrequestsender.py @@ -70,6 +70,7 @@ class AppointmentRequestSender(threading.Thread): self.contact_list = sub_contact_list self.queue_name = queue_name self.proxy_to_use_list = proxy_to_use_list + self.already_read_emails = False def set_up_connection(self): self.connection = pika.BlockingConnection( @@ -193,12 +194,16 @@ class AppointmentRequestSender(threading.Thread): self.channel.start_consuming() def retreive_invalidate_urls(self): - self.logger.info("will retreive_invalidate_urls") - _mail_list = MONGO_STORE_MANAGER.get_destination_emails() - _mail_list_filtred = [] - for mail in _mail_list: - for _contact in self.list_to_retrieve_mails: - if _contact.mail == mail.mail: - _mail_list_filtred.append(mail) - self.logger.info("will call find_links_to_validate_from_mail_list, size = " + str(len(_mail_list_filtred))) - find_links_to_validate_from_mail_list(_mail_list_filtred) + if not self.already_read_emails: + self.logger.info("will retreive_invalidate_urls") + _mail_list = MONGO_STORE_MANAGER.get_destination_emails() + _mail_list_filtred = [] + for mail in _mail_list: + for _contact in self.list_to_retrieve_mails: + if _contact.mail == mail.mail: + _mail_list_filtred.append(mail) + self.logger.info("will call find_links_to_validate_from_mail_list, size = " + str(len(_mail_list_filtred))) + find_links_to_validate_from_mail_list(_mail_list_filtred) + self.already_read_emails = True + else: + self.logger.info("already read emails, will not retreive_invalidate_urls") diff --git a/request_sender.py b/request_sender.py index 0017c8e..dac772e 100644 --- a/request_sender.py +++ b/request_sender.py @@ -64,10 +64,10 @@ def send_appointment_request(message_queue_name, _contact_list): if __name__ == '__main__': - contacts_file_path = '~/Desktop/contact_list_2024-03-20.xlsx' + contacts_file_path = '~/Desktop/contact_list_2024-03-19.xlsx' _contact_list = read_contacts(contacts_file_path) _contact_list_to_book = filter_contacts(_contact_list) - _segment_number = 2 + _segment_number = 1 logger.info("{} contacts to book".format(len(_contact_list_to_book))) last_thread = None for i in range(0, _segment_number): diff --git a/workers/sender.py b/workers/sender.py index 1f59f89..99b0ec1 100644 --- a/workers/sender.py +++ b/workers/sender.py @@ -94,6 +94,7 @@ class Sender: self.logger.info("body in json:{}".format(json.dumps(self.received_dict))) self.cookiesPublisher.publish_body(json.dumps(self.received_dict)) # self.cookie_str = new_coolies_str + self.logger.info("successful request, mail:{}".format(contact.mail)) return RequestResult.SUCCESS else: print(response.text) From abe51a0d201e79191d4dc15602913372376a8e15 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Tue, 26 Mar 2024 22:35:33 +0100 Subject: [PATCH 06/22] update cfpfe and stcfp data to 4.24.4 --- models/jsdata_pojo.py | 4 ++-- workers/captcha_result_getter.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/models/jsdata_pojo.py b/models/jsdata_pojo.py index 8f90fca..08a55d0 100644 --- a/models/jsdata_pojo.py +++ b/models/jsdata_pojo.py @@ -265,8 +265,8 @@ class JsDataPojo: self.emt = False self.bfr = False self.dbov = False - self.cfpfe = "ZnVuY3Rpb24oKXt2YXIgdD1kb2N1bWVudFsnXHg3MVx4NzVceDY1XHg3Mlx4NzlceDUzXHg2NVx4NmNceDY1XHg2M1x4NzRceDZmXHg3MiddKCdceDYyXHg3Mlx4NmZceDc3XHg3M1x4NjVceDcyXHg2Nlx4NmNceDZmXHg3N1x4MmRceDYzXHg2Zlx4NmVceDc0XHg2" # 4.24.1 - self.stcfp = "Oi8vZC5kaWdpdGFsLmhlcm1lcy90YWdzLmpzOjI6NzYzMDIpCiAgICBhdCB0LmV4cG9ydHMuZGRfYWIgKGh0dHBzOi8vZC5kaWdpdGFsLmhlcm1lcy90YWdzLmpzOjI6OTA2MjIpCiAgICBhdCBodHRwczovL2QuZGlnaXRhbC5oZXJtZXMvdGFncy5qczoyOjUxNzMz" # 4.24.1 + self.cfpfe = "ZnVuY3Rpb24oKXt2YXIgbj10LG89ZG9jdW1lbnRbbigzODYpXSgnXHg2Mlx4NzJceDZmXHg3N1x4NzNceDY1XHg3Mlx4NjZceDZjXHg2Zlx4NzdceDJkXHg2M1x4NmZceDZlXHg3NFx4NjFceDY5XHg2ZVx4NjVceDcyJyk7aWYobyl7IWZ1bmN0aW9uIHQoKXt2YXIg" # 4.24.4 + self.stcfp = "dy5oZXJtZXMuY29tL2ZyL2ZyL3BvbHlmaWxscy43ZTFjYzMyM2Y3OWMwNjZlLmpzOjE6ODI2MykKICAgIGF0IG0uYXJncy48Y29tcHV0ZWQ+IChodHRwczovL3d3dy5oZXJtZXMuY29tL2ZyL2ZyL3BvbHlmaWxscy43ZTFjYzMyM2Y3OWMwNjZlLmpzOjE6Mjc5NTEp" # 4.24.4 self.ckwa = True def to_url_encoded_json(self): diff --git a/workers/captcha_result_getter.py b/workers/captcha_result_getter.py index 9248179..133a37a 100644 --- a/workers/captcha_result_getter.py +++ b/workers/captcha_result_getter.py @@ -101,7 +101,7 @@ class CaptchaResultGetter: return None def get_ch_raw_data_from_js_data(self, js_data: JsDataPojo, old_valid_cookie) -> str: - _tag_version = "4.24.2" + _tag_version = "4.24.4" _raw_data = "jsData={}&eventCounters=%5B%5D&jsType=ch&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv={}".format( js_data.to_url_encoded_json(), old_valid_cookie, _tag_version) print("raw data is " + _raw_data) @@ -168,7 +168,7 @@ class CaptchaResultGetter: # _le_js_raw_data = self.get_le_raw_data_from_js_data(js_le_type_data=js_le_type_data, # old_valid_cookie=old_valid_cookie) _cid = get_datadome_cookies(old_valid_cookie) - _raw_data = "jsData={}&eventCounters=%7B%22mousemove%22%3A{}%2C%22click%22%3A{}%2C%22scroll%22%3A{}%2C%22touchstart%22%3A{}%2C%22touchend%22%3A{}%2C%22touchmove%22%3A{}%2C%22keydown%22%3A{}%2C%22keyup%22%3A{}%7D&jsType=le&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv=4.24.2".format( + _raw_data = "jsData={}&eventCounters=%7B%22mousemove%22%3A{}%2C%22click%22%3A{}%2C%22scroll%22%3A{}%2C%22touchstart%22%3A{}%2C%22touchend%22%3A{}%2C%22touchmove%22%3A{}%2C%22keydown%22%3A{}%2C%22keyup%22%3A{}%7D&jsType=le&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv=4.24.4".format( js_le_type_data.to_url_encoded_json(), mousemove_count, click_count, scroll_count, touch_count, touch_count, touch_move, key_count, From 4a5c2b4b0ec39771b25ea03a0ed47bc07414ed59 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Wed, 27 Mar 2024 11:10:41 +0100 Subject: [PATCH 07/22] update cfpfe and stcfp data to 4.24.3 --- models/jsdata_pojo.py | 4 ++-- queue_message/CookiesPublisher.py | 1 + request_sender.py | 8 ++++---- workers/proxies_constants.py | 7 ++++++- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/models/jsdata_pojo.py b/models/jsdata_pojo.py index 08a55d0..5e399f9 100644 --- a/models/jsdata_pojo.py +++ b/models/jsdata_pojo.py @@ -265,8 +265,8 @@ class JsDataPojo: self.emt = False self.bfr = False self.dbov = False - self.cfpfe = "ZnVuY3Rpb24oKXt2YXIgbj10LG89ZG9jdW1lbnRbbigzODYpXSgnXHg2Mlx4NzJceDZmXHg3N1x4NzNceDY1XHg3Mlx4NjZceDZjXHg2Zlx4NzdceDJkXHg2M1x4NmZceDZlXHg3NFx4NjFceDY5XHg2ZVx4NjVceDcyJyk7aWYobyl7IWZ1bmN0aW9uIHQoKXt2YXIg" # 4.24.4 - self.stcfp = "dy5oZXJtZXMuY29tL2ZyL2ZyL3BvbHlmaWxscy43ZTFjYzMyM2Y3OWMwNjZlLmpzOjE6ODI2MykKICAgIGF0IG0uYXJncy48Y29tcHV0ZWQ+IChodHRwczovL3d3dy5oZXJtZXMuY29tL2ZyL2ZyL3BvbHlmaWxscy43ZTFjYzMyM2Y3OWMwNjZlLmpzOjE6Mjc5NTEp" # 4.24.4 + self.cfpfe = "ZnVuY3Rpb24oKXt2YXIgbj10LG89ZG9jdW1lbnRbbigzODYpXSgnXHg2Mlx4NzJceDZmXHg3N1x4NzNceDY1XHg3Mlx4NjZceDZjXHg2Zlx4NzdceDJkXHg2M1x4NmZceDZlXHg3NFx4NjFceDY5XHg2ZVx4NjVceDcyJyk7aWYobyl7IWZ1bmN0aW9uIHQoKXt2YXIg" # 4.24.3 + self.stcfp = "dy5oZXJtZXMuY29tL2ZyL2ZyL3BvbHlmaWxscy43ZTFjYzMyM2Y3OWMwNjZlLmpzOjE6ODI2MykKICAgIGF0IG0uYXJncy48Y29tcHV0ZWQ+IChodHRwczovL3d3dy5oZXJtZXMuY29tL2ZyL2ZyL3BvbHlmaWxscy43ZTFjYzMyM2Y3OWMwNjZlLmpzOjE6Mjc5NTEp" # 4.24.3 self.ckwa = True def to_url_encoded_json(self): diff --git a/queue_message/CookiesPublisher.py b/queue_message/CookiesPublisher.py index 7162eb5..5b1dcd3 100644 --- a/queue_message/CookiesPublisher.py +++ b/queue_message/CookiesPublisher.py @@ -5,6 +5,7 @@ REQUEST_DATA_QUEUE = 'REQUEST_DATA' # REQUEST_DATA_QUEUE_TEST = 'REQUEST_DATA_TEST' REQUEST_DATA_QUEUE_TEST = 'REQUEST_DATA_TEST' TEST_QUEUE = 'TEST_QUEUE' +SHARED_OBJECT = 'SHARED_OBJECT' REQUEST_DATA_QUEUE_DE = 'REQUEST_DATA_DE' REQUEST_DATA_OBJECT = 'REQUEST_DATA_OBJECT' REGISTER_QUEUE = 'REGISTER_QUEUE' diff --git a/request_sender.py b/request_sender.py index dac772e..27bff36 100644 --- a/request_sender.py +++ b/request_sender.py @@ -6,7 +6,7 @@ from threading import Thread from db.mongo_manager import MONGO_STORE_MANAGER from excel_reader import read_contacts from models.contact_pojo import ContactPojo -from queue_message.CookiesPublisher import CookiesPublisher, TEST_QUEUE +from queue_message.CookiesPublisher import CookiesPublisher, SHARED_OBJECT, TEST_QUEUE from queue_message.appointmentrequestsender import AppointmentRequestSender from utiles import is_time_between from utils.AppLogging import init_logger @@ -64,17 +64,17 @@ def send_appointment_request(message_queue_name, _contact_list): if __name__ == '__main__': - contacts_file_path = '~/Desktop/contact_list_2024-03-19.xlsx' + contacts_file_path = '~/Desktop/contact_list_2024-03-26-2.xlsx' _contact_list = read_contacts(contacts_file_path) _contact_list_to_book = filter_contacts(_contact_list) - _segment_number = 1 + _segment_number = 5 logger.info("{} contacts to book".format(len(_contact_list_to_book))) last_thread = None for i in range(0, _segment_number): logger.info("segment is {}".format(i)) _step = int(len(_contact_list_to_book) / _segment_number) _sublist = _contact_list_to_book[i * _step:_step * (i + 1)] - _thread1 = Thread(target=send_appointment_request, args=(TEST_QUEUE, _sublist)) + _thread1 = Thread(target=send_appointment_request, args=(SHARED_OBJECT, _sublist)) last_thread = _thread1 _thread1.start() last_thread.join() diff --git a/workers/proxies_constants.py b/workers/proxies_constants.py index 9e08c1b..fbdaa74 100644 --- a/workers/proxies_constants.py +++ b/workers/proxies_constants.py @@ -52,6 +52,11 @@ FR_PROXY_MOB_OXY_STICKY = { 'https': 'http://customer-rendezvousmob-cc-FR:Rdv202220212023@fr-pr.oxylabs.io:{}' } +FR_PROXY_RES_OXY_STICKY = { + 'http': 'http://customer-rendezvous-cc-FR:Rdv202220212023@fr-pr.oxylabs.io:{}', + 'https': 'http://customer-rendezvous-cc-FR:Rdv202220212023@fr-pr.oxylabs.io:{}' +} + DE_PROXY_MOBILE = { 'http': 'http://8153587-mobile-country-DE:jp50x9jmo@194.88.106.169:11842', 'https': 'http://8153587-mobile-country-DE:jp50x9jmo@194.88.106.169:11842' @@ -66,7 +71,7 @@ DE_PROXY_RES = { # PROXY_LIST = [FR_PROXY_MOBILE, FR_PROXY_RES, DE_PROXY_RES, DE_PROXY_MOBILE, ES_PROXY_MOBILE, IT_PROXY_MOBILE] # PROXY_LIST_FR = [FR_PROXY_MOBILE_2, FR_PROXY_MOBILE, FR_PROXY_RES, FR_PROXY_RES_4, FR_PROXY_RES_2] # PROXY_LIST_FR = [FR_PROXY_RES_OXY, FR_PROXY_MOBILE_3] -PROXY_LIST_FR = [FR_PROXY_RES_OXY] +PROXY_LIST_FR = [FR_PROXY_MOB_OXY] MOBILE_PROXY_LIST_FR = [FR_PROXY_MOB_OXY_STICKY] # PROXY_LIST = [FR_PROXY_MOBILE, FR_ # PROXY_RES] From e08483fe94848557ae8276b565ac6258d7ae67eb Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Wed, 27 Mar 2024 11:59:41 +0100 Subject: [PATCH 08/22] update cfpfe and stcfp data to 4.24.3 --- workers/captcha_result_getter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workers/captcha_result_getter.py b/workers/captcha_result_getter.py index 133a37a..e4b2ae2 100644 --- a/workers/captcha_result_getter.py +++ b/workers/captcha_result_getter.py @@ -101,7 +101,7 @@ class CaptchaResultGetter: return None def get_ch_raw_data_from_js_data(self, js_data: JsDataPojo, old_valid_cookie) -> str: - _tag_version = "4.24.4" + _tag_version = "4.24.3" _raw_data = "jsData={}&eventCounters=%5B%5D&jsType=ch&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv={}".format( js_data.to_url_encoded_json(), old_valid_cookie, _tag_version) print("raw data is " + _raw_data) @@ -168,7 +168,7 @@ class CaptchaResultGetter: # _le_js_raw_data = self.get_le_raw_data_from_js_data(js_le_type_data=js_le_type_data, # old_valid_cookie=old_valid_cookie) _cid = get_datadome_cookies(old_valid_cookie) - _raw_data = "jsData={}&eventCounters=%7B%22mousemove%22%3A{}%2C%22click%22%3A{}%2C%22scroll%22%3A{}%2C%22touchstart%22%3A{}%2C%22touchend%22%3A{}%2C%22touchmove%22%3A{}%2C%22keydown%22%3A{}%2C%22keyup%22%3A{}%7D&jsType=le&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv=4.24.4".format( + _raw_data = "jsData={}&eventCounters=%7B%22mousemove%22%3A{}%2C%22click%22%3A{}%2C%22scroll%22%3A{}%2C%22touchstart%22%3A{}%2C%22touchend%22%3A{}%2C%22touchmove%22%3A{}%2C%22keydown%22%3A{}%2C%22keyup%22%3A{}%7D&jsType=le&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv=4.24.3".format( js_le_type_data.to_url_encoded_json(), mousemove_count, click_count, scroll_count, touch_count, touch_count, touch_move, key_count, From 5dd90359a2a77ca6f8acfa394f006b8db27694b0 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Fri, 29 Mar 2024 16:03:42 +0100 Subject: [PATCH 09/22] do not read email for successful requests --- docs/generate_jsdata.json | 127 ++++++++++++++++++++++ excel_reader.py | 1 + models/contact_pojo.py | 2 + models/jsdata_pojo.py | 8 +- queue_message/appointmentrequestsender.py | 17 ++- request_sender.py | 10 +- workers/captcha_result_getter.py | 4 +- workers/proxies_constants.py | 5 +- workers/sender.py | 14 ++- 9 files changed, 171 insertions(+), 17 deletions(-) create mode 100644 docs/generate_jsdata.json diff --git a/docs/generate_jsdata.json b/docs/generate_jsdata.json new file mode 100644 index 0000000..27d1120 --- /dev/null +++ b/docs/generate_jsdata.json @@ -0,0 +1,127 @@ +{ + "opts": "endpoint,ajaxListenerPath", + "ttst": 93, + "ifov": false, + "hc": 2, + "br_oh": 663, + "br_ow": 384, + "ua": "Mozilla/5.0 (Linux; Android 11) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36", + "wbd": false, + "tagpu": 9.185432887936322, + "wdif": false, + "wdifrm": false, + "npmtm": false, + "br_h": 663, + "br_w": 384, + "isf": true, + "nddc": 1, + "rs_h": 811, + "rs_w": 384, + "rs_cd": 24, + "phe": false, + "nm": false, + "jsf": false, + "lg": "fr-FR", + "pr": 2.8125, + "ars_h": 811, + "ars_w": 384, + "tz": -60, + "str_ss": true, + "str_ls": true, + "str_idb": true, + "str_odb": true, + "plgod": false, + "plg": 2, + "plgne": "err", + "plgre": "err", + "plgof": "err", + "plggt": "err", + "pltod": false, + "hcovdr": false, + "hcovdr2": false, + "plovdr": false, + "plovdr2": false, + "ftsovdr": false, + "ftsovdr2": false, + "lb": false, + "eva": 33, + "lo": false, + "ts_mtp": 5, + "ts_tec": true, + "ts_tsa": true, + "vnd": "Google Inc.", + "bid": "NA", + "mmt": "empty", + "plu": "5gQIEKN,Iv26GLka", + "hdn": false, + "awe": false, + "geb": false, + "dat": false, + "med": "defined", + "aco": "probably", + "acots": false, + "acmp": "probably", + "acmpts": true, + "acw": "probably", + "acwts": false, + "acma": "maybe", + "acmats": false, + "acaa": "probably", + "acaats": true, + "ac3": "", + "ac3ts": false, + "acf": "probably", + "acfts": false, + "acmp4": "maybe", + "acmp4ts": false, + "acmp3": "probably", + "acmp3ts": false, + "acwm": "maybe", + "acwmts": false, + "ocpt": false, + "vco": "", + "vcots": false, + "vch": "probably", + "vchts": true, + "vcw": "probably", + "vcwts": true, + "vc3": "maybe", + "vc3ts": false, + "vcmp": "", + "vcmpts": false, + "vcq": "", + "vcqts": false, + "vc1": "probably", + "vc1ts": true, + "dvm": 4, + "sqt": false, + "so": "portrait-primary", + "wdw": true, + "cokys": "bG9hZFRpbWVzY3NpL=", + "ecpc": false, + "lgs": true, + "lgsod": false, + "psn": true, + "edp": false, + "addt": true, + "wsdc": true, + "ccsr": true, + "nuad": true, + "bcda": true, + "idn": true, + "capi": false, + "svde": false, + "vpbq": true, + "ucdv": false, + "spwn": false, + "emt": false, + "bfr": false, + "dbov": false, + "cfpfe": "ZnVuY3Rpb24oKXt2YXIgdD1kb2N1bWVudFsnXHg3MVx4NzVceDY1XHg3Mlx4NzlceDUzXHg2NVx4NmNceDY1XHg2M1x4NzRceDZmXHg3MiddKCdceDYyXHg3Mlx4NmZceDc3XHg3M1x4NjVceDcyXHg2Nlx4NmNceDZmXHg3N1x4MmRceDYzXHg2Zlx4NmVceDc0XHg2", + "stcfp": "Oi8vZC5kaWdpdGFsLmhlcm1lcy90YWdzLmpzOjI6NzYzMDIpCiAgICBhdCB0LmV4cG9ydHMuZGRfYWIgKGh0dHBzOi8vZC5kaWdpdGFsLmhlcm1lcy90YWdzLmpzOjI6OTA2MjIpCiAgICBhdCBodHRwczovL2QuZGlnaXRhbC5oZXJtZXMvdGFncy5qczoyOjUxNzMz", + "ckwa": true, + "glrd": "Adreno (TM) 630", + "glvd": "Qualcomm", + "wwl": false, + "jset": 1710598314 +} diff --git a/excel_reader.py b/excel_reader.py index 0f98b58..ad75e5b 100755 --- a/excel_reader.py +++ b/excel_reader.py @@ -30,6 +30,7 @@ def read_contacts(file_name) -> list: first_name=first_name, passport_number=contact_dict['passport'], mail=contact_dict['email']) + contact.store = contact_dict['store'] contact_list.append(contact) return contact_list diff --git a/models/contact_pojo.py b/models/contact_pojo.py index b841c20..54102f3 100755 --- a/models/contact_pojo.py +++ b/models/contact_pojo.py @@ -10,6 +10,7 @@ class ContactPojo: mail: str ccid: str position: int + store: str note: str def __init__(self, phone_number: str, passport_number: str, last_name: str, first_name: str, mail: str, @@ -31,6 +32,7 @@ class ContactPojo: u'last_name': self.last_name, u'first_name': self.first_name, u'mail': self.mail, + u'store': self.store, u'ccid': self.ccid, u'position': self.position } diff --git a/models/jsdata_pojo.py b/models/jsdata_pojo.py index 5e399f9..93f29f2 100644 --- a/models/jsdata_pojo.py +++ b/models/jsdata_pojo.py @@ -9,7 +9,7 @@ import jsonpickle @dataclass class JsDataPojo: - opts: str + # opts: str ttst: int ifov: bool hc: int @@ -137,7 +137,7 @@ class JsDataPojo: def __init__(self, glrd, glvd, hc, br_oh, ua, br_ow, br_h, br_w, rs_h, rs_w, rs_cd, ars_h, ars_w, plg, eva, plu, vnd, pr, ts_mtp, dvm): - self.opts = "endpoint,ajaxListenerPath" + # self.opts = "endpoint,ajaxListenerPath" self.ttst = random.randint(38, 148) self.ifov = False self.hc = hc @@ -265,8 +265,8 @@ class JsDataPojo: self.emt = False self.bfr = False self.dbov = False - self.cfpfe = "ZnVuY3Rpb24oKXt2YXIgbj10LG89ZG9jdW1lbnRbbigzODYpXSgnXHg2Mlx4NzJceDZmXHg3N1x4NzNceDY1XHg3Mlx4NjZceDZjXHg2Zlx4NzdceDJkXHg2M1x4NmZceDZlXHg3NFx4NjFceDY5XHg2ZVx4NjVceDcyJyk7aWYobyl7IWZ1bmN0aW9uIHQoKXt2YXIg" # 4.24.3 - self.stcfp = "dy5oZXJtZXMuY29tL2ZyL2ZyL3BvbHlmaWxscy43ZTFjYzMyM2Y3OWMwNjZlLmpzOjE6ODI2MykKICAgIGF0IG0uYXJncy48Y29tcHV0ZWQ+IChodHRwczovL3d3dy5oZXJtZXMuY29tL2ZyL2ZyL3BvbHlmaWxscy43ZTFjYzMyM2Y3OWMwNjZlLmpzOjE6Mjc5NTEp" # 4.24.3 + self.cfpfe = "ZnVuY3Rpb24oKXt2YXIgdD1kb2N1bWVudFsnXHg3MVx4NzVceDY1XHg3Mlx4NzlceDUzXHg2NVx4NmNceDY1XHg2M1x4NzRceDZmXHg3MiddKCdceDYyXHg3Mlx4NmZceDc3XHg3M1x4NjVceDcyXHg2Nlx4NmNceDZmXHg3N1x4MmRceDYzXHg2Zlx4NmVceDc0XHg2" # 4.25.0 + self.stcfp = "dy5oZXJtZXMuY29tL2ZyL2ZyL3BvbHlmaWxscy43ZTFjYzMyM2Y3OWMwNjZlLmpzOjE6ODI2MykKICAgIGF0IG0uYXJncy48Y29tcHV0ZWQ+IChodHRwczovL3d3dy5oZXJtZXMuY29tL2ZyL2ZyL3BvbHlmaWxscy43ZTFjYzMyM2Y3OWMwNjZlLmpzOjE6Mjc5NTEp" # 4.25.0 self.ckwa = True def to_url_encoded_json(self): diff --git a/queue_message/appointmentrequestsender.py b/queue_message/appointmentrequestsender.py index b7e0849..a1bbec6 100644 --- a/queue_message/appointmentrequestsender.py +++ b/queue_message/appointmentrequestsender.py @@ -34,6 +34,7 @@ def is_already_sent(contact: ContactPojo) -> bool: def filter_contacts(_contact_list: list, provided_list=[]) -> list: already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day() already_sent_contacts.extend(provided_list) + _link_to_validate_list = MONGO_STORE_MANAGER.get_links_to_validate() _contact_list_to_book = [] for contact in _contact_list: _to_add = True @@ -44,6 +45,11 @@ def filter_contacts(_contact_list: list, provided_list=[]) -> list: else: if contact.mail == booked.mail: _to_add = False + # 如果已经收到链接了,就不要再请求 + for link_to_validate in _link_to_validate_list: + if contact.mail == link_to_validate.email: + print("{}: link already received".format(contact.mail)) + _to_add = False if _to_add: _contact_list_to_book.append(contact) @@ -127,7 +133,7 @@ class AppointmentRequestSender(threading.Thread): _new_cookies = captchaResultGetter.get_valid_ch_cookie(sender.proxy_to_use, js_data, old_valid_cookie=_received_cookies) if _new_cookies is not None: - # self.logger.info("new cookie is " + _new_cookies) + self.logger.info("new cookie is " + _new_cookies) time.sleep(random.randint(1, 3)) # m_s_c = f.scroll m_s_c = random.randint(0, 3) @@ -160,6 +166,14 @@ class AppointmentRequestSender(threading.Thread): time.sleep(random.randint(1, 3)) self.already_tried_contact_list.append(con) can_continue = sender.send_request(HERMES_REGISTER, js_data, con, csrf=self.valid_csrf) + if can_continue == RequestResult.SUCCESS: + # 让服务器读取成功的约会 + try: + self.logger.info("try to remove success contact from list to retrieve mails") + self.list_to_retrieve_mails.remove(con) + except Exception as e: + self.logger.info("exception while remove success contact from list to retrieve mails") + print(e) else: can_continue = RequestResult.COOKIES_ERROR else: @@ -196,6 +210,7 @@ class AppointmentRequestSender(threading.Thread): def retreive_invalidate_urls(self): if not self.already_read_emails: self.logger.info("will retreive_invalidate_urls") + time.sleep(30) _mail_list = MONGO_STORE_MANAGER.get_destination_emails() _mail_list_filtred = [] for mail in _mail_list: diff --git a/request_sender.py b/request_sender.py index 27bff36..f148f6a 100644 --- a/request_sender.py +++ b/request_sender.py @@ -26,12 +26,18 @@ def is_already_sent(contact: ContactPojo) -> bool: def filter_contacts(_contact_list: list) -> list: already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day() + _link_to_validate_list = MONGO_STORE_MANAGER.get_links_to_validate() _contact_list_to_book = [] for contact in _contact_list: _to_add = True for booked in already_sent_contacts: if contact.mail == booked.email: _to_add = False + #如果已经收到链接了,就不要再请求 + for link_to_validate in _link_to_validate_list: + if contact.mail == link_to_validate.email: + logger.info("{}: link already received".format(contact.mail)) + _to_add = False if _to_add: _contact_list_to_book.append(contact) @@ -64,10 +70,10 @@ def send_appointment_request(message_queue_name, _contact_list): if __name__ == '__main__': - contacts_file_path = '~/Desktop/contact_list_2024-03-26-2.xlsx' + contacts_file_path = '~/Desktop/contact_list_2024-03-25.xlsx' _contact_list = read_contacts(contacts_file_path) _contact_list_to_book = filter_contacts(_contact_list) - _segment_number = 5 + _segment_number = 2 logger.info("{} contacts to book".format(len(_contact_list_to_book))) last_thread = None for i in range(0, _segment_number): diff --git a/workers/captcha_result_getter.py b/workers/captcha_result_getter.py index e4b2ae2..bdf9243 100644 --- a/workers/captcha_result_getter.py +++ b/workers/captcha_result_getter.py @@ -101,7 +101,7 @@ class CaptchaResultGetter: return None def get_ch_raw_data_from_js_data(self, js_data: JsDataPojo, old_valid_cookie) -> str: - _tag_version = "4.24.3" + _tag_version = "4.25.0" _raw_data = "jsData={}&eventCounters=%5B%5D&jsType=ch&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv={}".format( js_data.to_url_encoded_json(), old_valid_cookie, _tag_version) print("raw data is " + _raw_data) @@ -168,7 +168,7 @@ class CaptchaResultGetter: # _le_js_raw_data = self.get_le_raw_data_from_js_data(js_le_type_data=js_le_type_data, # old_valid_cookie=old_valid_cookie) _cid = get_datadome_cookies(old_valid_cookie) - _raw_data = "jsData={}&eventCounters=%7B%22mousemove%22%3A{}%2C%22click%22%3A{}%2C%22scroll%22%3A{}%2C%22touchstart%22%3A{}%2C%22touchend%22%3A{}%2C%22touchmove%22%3A{}%2C%22keydown%22%3A{}%2C%22keyup%22%3A{}%7D&jsType=le&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv=4.24.3".format( + _raw_data = "jsData={}&eventCounters=%7B%22mousemove%22%3A{}%2C%22click%22%3A{}%2C%22scroll%22%3A{}%2C%22touchstart%22%3A{}%2C%22touchend%22%3A{}%2C%22touchmove%22%3A{}%2C%22keydown%22%3A{}%2C%22keyup%22%3A{}%7D&jsType=le&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv=4.25.0".format( js_le_type_data.to_url_encoded_json(), mousemove_count, click_count, scroll_count, touch_count, touch_count, touch_move, key_count, diff --git a/workers/proxies_constants.py b/workers/proxies_constants.py index fbdaa74..3709d9a 100644 --- a/workers/proxies_constants.py +++ b/workers/proxies_constants.py @@ -72,7 +72,4 @@ DE_PROXY_RES = { # PROXY_LIST_FR = [FR_PROXY_MOBILE_2, FR_PROXY_MOBILE, FR_PROXY_RES, FR_PROXY_RES_4, FR_PROXY_RES_2] # PROXY_LIST_FR = [FR_PROXY_RES_OXY, FR_PROXY_MOBILE_3] PROXY_LIST_FR = [FR_PROXY_MOB_OXY] -MOBILE_PROXY_LIST_FR = [FR_PROXY_MOB_OXY_STICKY] -# PROXY_LIST = [FR_PROXY_MOBILE, FR_ -# PROXY_RES] -PROXY_LIST_DE = [DE_PROXY_RES, DE_PROXY_MOBILE] +MOBILE_PROXY_LIST_FR = [FR_PROXY_MOB_OXY] diff --git a/workers/sender.py b/workers/sender.py index 99b0ec1..6ea2316 100644 --- a/workers/sender.py +++ b/workers/sender.py @@ -28,7 +28,7 @@ class Sender: self.proxy_to_use = proxy_to_use self.cookie.load(self.cookie_str) - def publish_message_to_queue(self, contact: ContactPojo, status: PublishType, url: str): + def publish_message_to_queue(self, contact: ContactPojo, status: PublishType, url: str, store_type: str): # create the message if url == "https://rendezvousparis.hermes.com/client/welcome": return @@ -37,7 +37,7 @@ class Sender: firstName=contact.first_name, lastName=contact.last_name, email=contact.mail, passport=contact.passport, ccid=contact.ccid) result.id = id - result.store_type = self.store_type + result.store_type = store_type result.created_at = time.strftime("%H:%M:%S", time.localtime()) collection_name = str(datetime.date.today()) MONGO_STORE_MANAGER.insert_reserve_result(collection_name=collection_name, reserve=result) @@ -57,6 +57,11 @@ class Sender: else: self._csrf = csrf _csrf = csrf + # 默认为空,也就是随机 + _selected_store = '' + if contact.store is not None and contact.store != "random": + _selected_store = contact.store + self.logger.info("store is:{}".format(_selected_store)) headers = {'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': js_data.ua, 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8', @@ -64,7 +69,7 @@ class Sender: 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Dest': 'document', 'Accept-Language': 'fr-FR,fr;q=0.6'} - data = {'check': '', '_csrf': _csrf, 'prefer': '', + data = {'check': '', '_csrf': _csrf, 'prefer': _selected_store, 'surname': contact.last_name.capitalize(), 'name': contact.first_name.capitalize(), 'phone_country': "FR", 'phone_number': "+33 " + contact.phone, 'email': contact.mail, 'passport_id': contact.passport, 'processing': 'on', 'cgu': 'on'} @@ -81,7 +86,8 @@ class Sender: # add to mongodb self.logger.info(response.text) self.logger.info("{}:{}".format(contact.mail, response.url)) - self.publish_message_to_queue(contact, status=PublishType.SUCCESS, url=response.url) + self.publish_message_to_queue(contact, status=PublishType.SUCCESS, url=response.url, + store_type=_selected_store) cookies_to_set = response.headers['set-cookie'] self.cookie.load(cookies_to_set) new_cookies = {k: v.value for k, v in self.cookie.items()} From eb0c04be0b381a5309165b69d42125a5dbf1c481 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Fri, 29 Mar 2024 16:33:58 +0100 Subject: [PATCH 10/22] use FR_PROXY_MOB_OXY_STICKY --- request_sender.py | 4 ++-- workers/proxies_constants.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/request_sender.py b/request_sender.py index f148f6a..69470be 100644 --- a/request_sender.py +++ b/request_sender.py @@ -70,10 +70,10 @@ def send_appointment_request(message_queue_name, _contact_list): if __name__ == '__main__': - contacts_file_path = '~/Desktop/contact_list_2024-03-25.xlsx' + contacts_file_path = '~/Desktop/contact_list_2024-03-23.xlsx' _contact_list = read_contacts(contacts_file_path) _contact_list_to_book = filter_contacts(_contact_list) - _segment_number = 2 + _segment_number = 5 logger.info("{} contacts to book".format(len(_contact_list_to_book))) last_thread = None for i in range(0, _segment_number): diff --git a/workers/proxies_constants.py b/workers/proxies_constants.py index 3709d9a..faea1d8 100644 --- a/workers/proxies_constants.py +++ b/workers/proxies_constants.py @@ -72,4 +72,4 @@ DE_PROXY_RES = { # PROXY_LIST_FR = [FR_PROXY_MOBILE_2, FR_PROXY_MOBILE, FR_PROXY_RES, FR_PROXY_RES_4, FR_PROXY_RES_2] # PROXY_LIST_FR = [FR_PROXY_RES_OXY, FR_PROXY_MOBILE_3] PROXY_LIST_FR = [FR_PROXY_MOB_OXY] -MOBILE_PROXY_LIST_FR = [FR_PROXY_MOB_OXY] +MOBILE_PROXY_LIST_FR = [FR_PROXY_MOB_OXY_STICKY] From c1cb4b4d2f6045aeb52b8a64531900f43d3b2001 Mon Sep 17 00:00:00 2001 From: PAN Lei Date: Fri, 29 Mar 2024 19:38:00 +0100 Subject: [PATCH 11/22] check length of mail list before reading mail --- queue_message/appointmentrequestsender.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/queue_message/appointmentrequestsender.py b/queue_message/appointmentrequestsender.py index a1bbec6..118c69a 100644 --- a/queue_message/appointmentrequestsender.py +++ b/queue_message/appointmentrequestsender.py @@ -172,7 +172,8 @@ class AppointmentRequestSender(threading.Thread): self.logger.info("try to remove success contact from list to retrieve mails") self.list_to_retrieve_mails.remove(con) except Exception as e: - self.logger.info("exception while remove success contact from list to retrieve mails") + self.logger.info( + "exception while remove success contact from list to retrieve mails") print(e) else: can_continue = RequestResult.COOKIES_ERROR @@ -208,7 +209,7 @@ class AppointmentRequestSender(threading.Thread): self.channel.start_consuming() def retreive_invalidate_urls(self): - if not self.already_read_emails: + if not self.already_read_emails and len(self.list_to_retrieve_mails) > 0: self.logger.info("will retreive_invalidate_urls") time.sleep(30) _mail_list = MONGO_STORE_MANAGER.get_destination_emails() From 75091c0a6b87fd0df3c635b5b7ad67e2c84c0d61 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Sat, 30 Mar 2024 16:12:09 +0100 Subject: [PATCH 12/22] rename the method --- queue_message/appointmentrequestsender.py | 17 +++++----- workers/link_validator.py | 41 ++++++++++++++++------- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/queue_message/appointmentrequestsender.py b/queue_message/appointmentrequestsender.py index 118c69a..bc873bf 100644 --- a/queue_message/appointmentrequestsender.py +++ b/queue_message/appointmentrequestsender.py @@ -157,6 +157,7 @@ class AppointmentRequestSender(threading.Thread): rs_w=_received_dict['rs_w'], rs_cd=_received_dict['rs_cd'], m_s_c=m_s_c, m_m_c=m_m_c, m_c_c=m_c_c, m_cm_r=m_cm_r, m_ms_r=m_ms_r) + time.sleep(random.randint(1, 4)) _new_le_cookies = captchaResultGetter.get_le_valid_cookie(proxy_to_use=_proxy_to_use, js_le_type_data=js_le_data, old_valid_cookie=_new_cookies) @@ -193,7 +194,7 @@ class AppointmentRequestSender(threading.Thread): self.logger.info("will ack method.delivery_tag: " + str(method.delivery_tag)) ch.basic_ack(delivery_tag=method.delivery_tag) else: - self.retreive_invalidate_urls() + self.retrieve_invalidate_urls() self.logger.info("empty list") time.sleep(120) self.logger.info("will basic_reject method.delivery_tag: " + str(method.delivery_tag)) @@ -208,18 +209,18 @@ class AppointmentRequestSender(threading.Thread): self.listen_to_queue(self.on_message) self.channel.start_consuming() - def retreive_invalidate_urls(self): + def retrieve_invalidate_urls(self): if not self.already_read_emails and len(self.list_to_retrieve_mails) > 0: - self.logger.info("will retreive_invalidate_urls") + self.logger.info("will retrieve validate urls") time.sleep(30) _mail_list = MONGO_STORE_MANAGER.get_destination_emails() - _mail_list_filtred = [] + _mail_list_filtered = [] for mail in _mail_list: for _contact in self.list_to_retrieve_mails: if _contact.mail == mail.mail: - _mail_list_filtred.append(mail) - self.logger.info("will call find_links_to_validate_from_mail_list, size = " + str(len(_mail_list_filtred))) - find_links_to_validate_from_mail_list(_mail_list_filtred) + _mail_list_filtered.append(mail) + self.logger.info("will call find_links_to_validate_from_mail_list, size = " + str(len(_mail_list_filtered))) + find_links_to_validate_from_mail_list(_mail_list_filtered) self.already_read_emails = True else: - self.logger.info("already read emails, will not retreive_invalidate_urls") + self.logger.info("already read emails, will not retrieve validate urls") diff --git a/workers/link_validator.py b/workers/link_validator.py index a5f881c..7f85dda 100644 --- a/workers/link_validator.py +++ b/workers/link_validator.py @@ -1,3 +1,4 @@ +import json import random import threading import time @@ -9,7 +10,7 @@ import requests from db.mongo_manager import MONGO_STORE_MANAGER from models.LinkPojo import LinkPojo from models.result_pojo import RequestResult -from queue_message.CookiesPublisher import CookiesPublisher, REQUEST_DATA_QUEUE_TEST +from queue_message.CookiesPublisher import CookiesPublisher, REQUEST_DATA_QUEUE_TEST, TEST_QUEUE, SHARED_OBJECT from queue_message.appointmentrequestsender import QUEUE_HOST, REQUEST_DATA_QUEUE, credentials from workers.proxies_constants import PROXY_LIST_FR @@ -30,7 +31,6 @@ class LinkValidator(threading.Thread): self.filter_with_ip_country() self.proxy_to_use = proxy_to_use self.limit = limit - # self.cookie_str = 'datadome=~pxdHFAvsQl2rvDrTzhPgCHxu~4TBcePTTE~Cy8Rgol6oMRc11gA02VRp0Z3uEDUszCjacubNu7vbfQCh27gz8RC10u_325pt_gsMmJh1ScGvOofVJiVAbEKvSEUjd82;policy=accepted;app.sig=PhjmDkq_dI49pADppDNKxpLe_G4;app=eyJmbGFzaCI6e30sImNhY2hlZmxhc2giOltdLCJjc3JmU2VjcmV0IjoiYnRodHNYU1lvdnl4RzVGakpGRDZsQ0JtIn0=;lang=fr;' def set_up_connection(self): self.connection = pika.BlockingConnection( @@ -42,10 +42,13 @@ class LinkValidator(threading.Thread): self.channel.basic_consume(queue=self.queue_to_listen, auto_ack=False, on_message_callback=callback) self.channel.start_consuming() - def send_request(self, linkPojo: LinkPojo) -> RequestResult: + def send_request(self, linkPojo: LinkPojo, _received_dict = None) -> RequestResult: + _ua = 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36' + if _received_dict is not None: + _ua = _received_dict['ua'] self.cookie.load(self.cookie_str) headers = { - 'User-Agent': 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36', + 'User-Agent': _ua, 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate, br', 'Cache-Control': 'max-age=0', @@ -59,6 +62,8 @@ class LinkValidator(threading.Thread): print(self.proxy_to_use) print("received cookie is " + str(self.cookie_str)) try: + print("will send request with ua {}".format(_ua)) + print("will send request with cookie {}".format(self.cookie_str)) response = requests.get(url=linkPojo.url, headers=headers, verify=False, proxies=self.proxy_to_use, timeout=15) print(response.status_code) @@ -77,7 +82,12 @@ class LinkValidator(threading.Thread): new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";" print("will publish to queue {}".format(new_coolies_str)) # upload the cookie to queue - self.cookiesPublisher.publish_body(new_coolies_str) + if _received_dict is not None: + _received_dict['cookiesStr'] = new_coolies_str + print("body in json:{}".format(json.dumps(_received_dict))) + self.cookiesPublisher.publish_body(json.dumps(_received_dict)) + else: + self.cookiesPublisher.publish_body(new_coolies_str) self.cookie_str = new_coolies_str return RequestResult.SUCCESS elif DOUBLE_MESSAGE in _content: @@ -109,15 +119,23 @@ class LinkValidator(threading.Thread): print("message count in queue is {}".format(_message_in_queue_count)) self.link_to_validate_list = MONGO_STORE_MANAGER.get_links_to_validate() self.filter_with_ip_country() - self.cookie_str = body.decode("UTF-8") + _received_object = body.decode("UTF-8") + js_data = None + _received_dict = None + if "glrd" in _received_object: + _received_dict = json.loads(_received_object) + _received_cookies = _received_dict["cookiesStr"] + else: + _received_cookies = _received_object + self.cookie_str = _received_cookies random.shuffle(self.link_to_validate_list) - if len(self.link_to_validate_list) > 0 and _message_in_queue_count > self.limit: + if len(self.link_to_validate_list) > 0 and _message_in_queue_count >= self.limit: print("links number is {}".format(len(self.link_to_validate_list))) for con in self.link_to_validate_list: # if not is_already_sent(con): print(con.email) self.proxy_to_use = random.choice(PROXY_LIST_FR) - can_continue = self.send_request(con) + can_continue = self.send_request(con, _received_dict) if can_continue == RequestResult.BLOCKED: print("cannot continue, blocked, then skip") break @@ -140,9 +158,9 @@ class LinkValidator(threading.Thread): def validate_with_FR_ip(): - _queue_name = REQUEST_DATA_QUEUE + _queue_name = TEST_QUEUE link_list = MONGO_STORE_MANAGER.get_links_to_validate() - cookiesPublisher = CookiesPublisher(queue_name=_queue_name) + cookiesPublisher = CookiesPublisher(queue_name=SHARED_OBJECT) cookiesPublisher.set_up_connection() print("filter links with ip_country") _link_list_to_click = [] @@ -152,10 +170,9 @@ def validate_with_FR_ip(): for _l in _link_list_to_click: print(_l.ip_country) _fr_proxy_to_use = random.choice(PROXY_LIST_FR) - # random.shuffle(_link_list_to_click) receiver = LinkValidator(link_to_validate_list=_link_list_to_click, cookiesPublisher=cookiesPublisher, proxy_to_use=_fr_proxy_to_use, - queue_to_listen=_queue_name, ip_country="FR", limit=50) + queue_to_listen=_queue_name, ip_country="FR", limit=0) print("will connect to queue") receiver.set_up_connection() receiver.listen_to_queue(receiver.on_message) From 10ec643cf4aac189296bfa87dff90902983750f3 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Sat, 30 Mar 2024 16:14:13 +0100 Subject: [PATCH 13/22] use TEST_QUEUE for link validator --- workers/link_validator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workers/link_validator.py b/workers/link_validator.py index 7f85dda..4b4e8d3 100644 --- a/workers/link_validator.py +++ b/workers/link_validator.py @@ -160,7 +160,7 @@ class LinkValidator(threading.Thread): def validate_with_FR_ip(): _queue_name = TEST_QUEUE link_list = MONGO_STORE_MANAGER.get_links_to_validate() - cookiesPublisher = CookiesPublisher(queue_name=SHARED_OBJECT) + cookiesPublisher = CookiesPublisher(queue_name=TEST_QUEUE) cookiesPublisher.set_up_connection() print("filter links with ip_country") _link_list_to_click = [] From 8507f8a353f1fa9e886c762790c567d5286cdd31 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Tue, 2 Apr 2024 21:47:44 +0200 Subject: [PATCH 14/22] apply new tz value --- models/jsdata_pojo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/jsdata_pojo.py b/models/jsdata_pojo.py index 93f29f2..63e668d 100644 --- a/models/jsdata_pojo.py +++ b/models/jsdata_pojo.py @@ -166,7 +166,7 @@ class JsDataPojo: self.pr = pr self.ars_h = ars_h self.ars_w = ars_w - self.tz = -60 + self.tz = -120 self.str_ss = True self.str_ls = True self.str_idb = True From 7c23745679db9bc2498c072d42f625849825494c Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Wed, 3 Apr 2024 14:50:10 +0200 Subject: [PATCH 15/22] update stcfp --- models/jsdata_pojo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/jsdata_pojo.py b/models/jsdata_pojo.py index 63e668d..8505910 100644 --- a/models/jsdata_pojo.py +++ b/models/jsdata_pojo.py @@ -266,7 +266,7 @@ class JsDataPojo: self.bfr = False self.dbov = False self.cfpfe = "ZnVuY3Rpb24oKXt2YXIgdD1kb2N1bWVudFsnXHg3MVx4NzVceDY1XHg3Mlx4NzlceDUzXHg2NVx4NmNceDY1XHg2M1x4NzRceDZmXHg3MiddKCdceDYyXHg3Mlx4NmZceDc3XHg3M1x4NjVceDcyXHg2Nlx4NmNceDZmXHg3N1x4MmRceDYzXHg2Zlx4NmVceDc0XHg2" # 4.25.0 - self.stcfp = "dy5oZXJtZXMuY29tL2ZyL2ZyL3BvbHlmaWxscy43ZTFjYzMyM2Y3OWMwNjZlLmpzOjE6ODI2MykKICAgIGF0IG0uYXJncy48Y29tcHV0ZWQ+IChodHRwczovL3d3dy5oZXJtZXMuY29tL2ZyL2ZyL3BvbHlmaWxscy43ZTFjYzMyM2Y3OWMwNjZlLmpzOjE6Mjc5NTEp" # 4.25.0 + self.stcfp = "Oi8vZC5kaWdpdGFsLmhlcm1lcy90YWdzLmpzOjI6Nzc0NzkpCiAgICBhdCB0LmV4cG9ydHMuZGRfYWIgKGh0dHBzOi8vZC5kaWdpdGFsLmhlcm1lcy90YWdzLmpzOjI6OTA4NTQpCiAgICBhdCBodHRwczovL2QuZGlnaXRhbC5oZXJtZXMvdGFncy5qczoyOjUzMjI1" # 4.25.0 self.ckwa = True def to_url_encoded_json(self): From 8f3b35f55faeb0dd1e25a8b563caea195096ed1b Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Wed, 3 Apr 2024 16:25:48 +0200 Subject: [PATCH 16/22] break when the get_csrf is blocked --- queue_message/appointmentrequestsender.py | 10 ++++++++-- workers/captcha_result_getter.py | 12 ++++++++---- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/queue_message/appointmentrequestsender.py b/queue_message/appointmentrequestsender.py index bc873bf..10533c1 100644 --- a/queue_message/appointmentrequestsender.py +++ b/queue_message/appointmentrequestsender.py @@ -127,9 +127,15 @@ class AppointmentRequestSender(threading.Thread): for con in self.contact_list: self.logger.info(con.mail) if self.valid_csrf is None: - self.valid_csrf = captchaResultGetter.get_csrf( + csrf_result = captchaResultGetter.get_csrf( proxy_to_use=_proxy_to_use, js_data=js_data, cookie=_received_cookies) + if isinstance(csrf_result, str): + self.valid_csrf = csrf_result + else: + self.logger.info("csrf is {}".format(csrf_result)) + if csrf_result == RequestResult.BLOCKED: + break _new_cookies = captchaResultGetter.get_valid_ch_cookie(sender.proxy_to_use, js_data, old_valid_cookie=_received_cookies) if _new_cookies is not None: @@ -181,7 +187,7 @@ class AppointmentRequestSender(threading.Thread): else: can_continue = RequestResult.COOKIES_ERROR if can_continue == RequestResult.BLOCKED: - self.logger.info("cannot continue, valid_csrf is " + str(self.valid_csrf)) + self.logger.info("BLOCKED, valid_csrf is " + str(self.valid_csrf)) break elif can_continue == RequestResult.PROXY_ERROR: self.logger.info("PROXY_ERROR, will not reset valid_csrf") diff --git a/workers/captcha_result_getter.py b/workers/captcha_result_getter.py index bdf9243..5eefa95 100644 --- a/workers/captcha_result_getter.py +++ b/workers/captcha_result_getter.py @@ -6,6 +6,7 @@ import requests from models.jsdata_le_pojo import JsDataLeTypePojo from models.jsdata_pojo import JsDataPojo +from models.result_pojo import RequestResult from utils.get_only_datadome_cookies import get_datadome_cookies, get_app_cookies, get_lang_cookies, \ retain_only_dataome_cookies from workers.proxies_constants import PROXY_LIST_FR @@ -24,7 +25,7 @@ class CaptchaResultGetter: self.cookie_str = 'datadome=5Nq~NEP_qQSHC0g_lZNnZmEv36J8gVV~rpZ329xmCkTq2~H3meIoXr4h_b988qB2XW5Te7iEGsvq8BzA5KeFupyrZFh4kgrDyl8hT2UymSByKHzAcDaNIBPDsRu2g_KG; Max-Age=31536000; Domain=.hermes.com; Path=/; Secure; SameSite=None' pass - def get_csrf(self, proxy_to_use, js_data: JsDataPojo, cookie: str = None) -> Union[str, None]: + def get_csrf(self, proxy_to_use, js_data: JsDataPojo, cookie: str = None) -> Union[str, RequestResult]: if cookie is not None: headers = {'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': js_data.ua, @@ -49,13 +50,16 @@ class CaptchaResultGetter: timeout=15) print(response.status_code) if response.status_code == 200: - # print(response.text) - print(response.url) + print(response.text) return self.extract_csrf_from_html(response.text) + elif response.status_code == 403: + return RequestResult.BLOCKED else: - return None + print(response.text) + return RequestResult.UNKNOWN except Exception as error: print(error) + return RequestResult.PROXY_ERROR def extract_csrf_from_html(self, html: str) -> Union[str, None]: result = re.findall(r'_csrf" value="[A-Za-z0-9-_]+', html) From cbf88f94f6cd4232046b31a95c041f0cfa79bcb2 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Sat, 6 Apr 2024 09:32:36 +0200 Subject: [PATCH 17/22] break when the get_csrf is blocked --- models/result_pojo.py | 1 + parallel_request_sender.py | 87 ++++++++++ queue_message/parallel_requestsender.py | 221 ++++++++++++++++++++++++ request_sender.py | 9 +- workers/proxies_constants.py | 2 +- 5 files changed, 316 insertions(+), 4 deletions(-) create mode 100644 parallel_request_sender.py create mode 100644 queue_message/parallel_requestsender.py diff --git a/models/result_pojo.py b/models/result_pojo.py index a6ff553..8d62dfe 100644 --- a/models/result_pojo.py +++ b/models/result_pojo.py @@ -7,3 +7,4 @@ class RequestResult(Enum): SUCCESS = "SUCCESS" COOKIES_ERROR = "COOKIES_ERROR" UNKNOWN = "UNKNOWN" + CTRF_ERROR = "CTRF_ERROR" diff --git a/parallel_request_sender.py b/parallel_request_sender.py new file mode 100644 index 0000000..cf6f45c --- /dev/null +++ b/parallel_request_sender.py @@ -0,0 +1,87 @@ +import datetime +import logging +import sys +from threading import Thread + +from db.mongo_manager import MONGO_STORE_MANAGER +from excel_reader import read_contacts +from models.contact_pojo import ContactPojo +from queue_message.CookiesPublisher import CookiesPublisher, SHARED_OBJECT, TEST_QUEUE +from queue_message.appointmentrequestsender import AppointmentRequestSender +from queue_message.parallel_requestsender import ParallelRequestSender +from utiles import is_time_between +from utils.AppLogging import init_logger +from workers.proxies_constants import MOBILE_PROXY_LIST_FR + +IPFIY = 'http://api.ipify.org' +NGROK_TEST = "https://bcc6-193-164-156-53.ngrok-free.app" + + +def is_already_sent(contact: ContactPojo) -> bool: + already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day() + for required_contact in already_sent_contacts: + if contact.mail == required_contact.email: + return True + return False + + +def filter_contacts(_contact_list: list) -> list: + already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day() + _link_to_validate_list = MONGO_STORE_MANAGER.get_links_to_validate() + _contact_list_to_book = [] + for contact in _contact_list: + _to_add = True + for booked in already_sent_contacts: + if contact.mail == booked.email: + _to_add = False + # 如果已经收到链接了,就不要再请求 + for link_to_validate in _link_to_validate_list: + if contact.mail == link_to_validate.email: + logger.info("{}: link already received".format(contact.mail)) + _to_add = False + if _to_add: + _contact_list_to_book.append(contact) + + return _contact_list_to_book + + +def is_open(): + return is_time_between(datetime.time(10, 30), datetime.time(19, 00)) + + +count = 0 +init_logger() +logger = logging.getLogger() + +logger.addHandler(logging.StreamHandler(stream=sys.stdout)) + + +def send_appointment_request(message_queue_name, _contact_list): + global count + count = count + 1 + for _contact in _contact_list: + logger.info(_contact) + _cookiesPublisher = CookiesPublisher(queue_name=message_queue_name) + _cookiesPublisher.set_up_connection() + receiver = ParallelRequestSender(sub_contact_list=_contact_list, proxy_to_use_list=MOBILE_PROXY_LIST_FR, + queue_name=message_queue_name, just_send=True, + cookiesPublisher=_cookiesPublisher, logger=logger) + print("count is " + str(count)) + receiver.run() + + +if __name__ == '__main__': + contacts_file_path = '~/Desktop/31_03_to_test.xlsx' + _contact_list = read_contacts(contacts_file_path)[0:20] + _contact_list_to_book = filter_contacts(_contact_list) + _segment_number = 1 + logger.info("{} contacts to book".format(len(_contact_list_to_book))) + last_thread = None + for i in range(0, _segment_number): + logger.info("segment is {}".format(i)) + _step = int(len(_contact_list_to_book) / _segment_number) + _sublist = _contact_list_to_book[i * _step:_step * (i + 1)] + _thread1 = Thread(target=send_appointment_request, args=(TEST_QUEUE, _sublist)) + last_thread = _thread1 + _thread1.start() + last_thread.join() diff --git a/queue_message/parallel_requestsender.py b/queue_message/parallel_requestsender.py new file mode 100644 index 0000000..1c5ad23 --- /dev/null +++ b/queue_message/parallel_requestsender.py @@ -0,0 +1,221 @@ +import datetime +import json +import random +import threading +import time +from concurrent.futures.thread import ThreadPoolExecutor + +import pika + +from db.mongo_manager import MONGO_STORE_MANAGER +from mail.mail_reader_all_contacts import find_links_to_validate_from_mail_list +from models.ReserveResultPojo import ReserveResultPojo +from models.contact_pojo import ContactPojo +from models.jsdata_le_pojo import JsDataLeTypePojo +from models.jsdata_pojo import JsDataPojo +from models.result_pojo import RequestResult +from queue_message.CookiesPublisher import CookiesPublisher +from queue_message.appointmentrequestsender import filter_contacts, is_open +from utiles import is_time_between +from workers.captcha_result_getter import CaptchaResultGetter, HERMES_REGISTER +from workers.sender import Sender + +QUEUE_HOST = "appointment.lpaconsulting.fr" +REQUEST_DATA_QUEUE = 'REQUEST_DATA' +credentials = pika.PlainCredentials('appointment', 'ZyuhJZ2xEYWhElhpJjy7YEpZGZwNYJz2fHIu') + + +def is_already_sent(contact: ContactPojo) -> bool: + already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day() + for required_contact in already_sent_contacts: + if contact.mail == required_contact.email: + return True + return False + + +class ParallelRequestSender(threading.Thread): + def __init__(self, sub_contact_list: list, proxy_to_use_list, logger, cookiesPublisher: CookiesPublisher, + just_send=False, + queue_name=REQUEST_DATA_QUEUE): + super().__init__() + self.connection = None + self.just_send = just_send + self.logger = logger + self.already_tried_contact_list = [] + self.cookiesPublisher = cookiesPublisher + self.channel = None + self.valid_csrf = None + self.list_to_retrieve_mails = sub_contact_list + self.contact_list = sub_contact_list + self.queue_name = queue_name + self.proxy_to_use_list = proxy_to_use_list + self.already_read_emails = False + + def set_up_connection(self): + self.connection = pika.BlockingConnection( + pika.ConnectionParameters(host=QUEUE_HOST, port=5672, credentials=credentials)) + self.channel = self.connection.channel() + + def listen_to_queue(self, callback): + self.logger.info("listen to queue {}".format(self.queue_name)) + self.channel.basic_qos(prefetch_count=1) + self.channel.basic_consume(queue=self.queue_name, auto_ack=False, on_message_callback=callback) + self.channel.start_consuming() + + def send_request(self, _received_cookies, _received_dict, js_data: JsDataPojo, logger, + _contact) -> RequestResult: + _proxy_to_use = self.generate_proxy() + logger.info("send_request for contact: {}, cookies: {}".format(_contact.mail, _received_cookies)) + logger.info("proxy to use is {}".format(_proxy_to_use)) + sender = Sender(_received_cookies, cookiesPublisher=self.cookiesPublisher, received_dict=_received_dict, + proxy_to_use=_proxy_to_use, logger=logger) + # remove already sent contacts + if is_open(): + captchaResultGetter = CaptchaResultGetter() + _new_cookies = captchaResultGetter.get_valid_ch_cookie(sender.proxy_to_use, js_data, + old_valid_cookie=_received_cookies) + # self.contact_list = filter_contacts(self.contact_list) + logger.info(_contact.mail) + valid_csrf = captchaResultGetter.get_csrf( + proxy_to_use=_proxy_to_use, js_data=js_data, + cookie=_new_cookies) + if isinstance(valid_csrf, str): + if _new_cookies is not None: + logger.info("new cookie is " + _new_cookies) + # m_s_c = f.scroll + m_s_c = random.randint(0, 3) + m_c_c = random.randint(3, 5) # click count + m_m_c = random.randint(3, 5) # move count + m_cm_r = m_c_c / m_m_c + m_ms_r = random.randint(-1, 1) + + js_le_data = JsDataLeTypePojo(glrd=_received_dict['glrd'], glvd=_received_dict['glvd'], + hc=_received_dict['hc'], + ua=_received_dict['ua'], br_oh=_received_dict['br_oh'], + br_ow=_received_dict['br_ow'], + ars_h=_received_dict['ars_h'], ars_w=_received_dict['ars_w'], + pr=_received_dict['pr'], + plg=_received_dict['plg'], br_h=_received_dict['br_h'], + br_w=_received_dict['br_w'], + plu=_received_dict['plu'], vnd=_received_dict['vnd'], + dvm=_received_dict['dvm'], + ts_mtp=_received_dict['ts_mtp'], eva=_received_dict['eva'], + rs_h=_received_dict['rs_h'], + rs_w=_received_dict['rs_w'], rs_cd=_received_dict['rs_cd'], + m_s_c=m_s_c, m_m_c=m_m_c, m_c_c=m_c_c, + m_cm_r=m_cm_r, m_ms_r=m_ms_r) + time.sleep(random.randint(1, 4)) + _new_le_cookies = captchaResultGetter.get_le_valid_cookie(proxy_to_use=_proxy_to_use, + js_le_type_data=js_le_data, + old_valid_cookie=_new_cookies) + if _new_le_cookies is not None: + # self.logger.info("new le type cookie is " + _new_le_cookies) + sender.cookie_str = _new_le_cookies + time.sleep(random.randint(1, 3)) + self.already_tried_contact_list.append(_contact) + can_continue = sender.send_request(HERMES_REGISTER, js_data, _contact, csrf=valid_csrf) + if can_continue == RequestResult.SUCCESS: + # 让服务器读取成功的约会 + try: + self.logger.info("try to remove success contact from list to retrieve mails") + self.list_to_retrieve_mails.remove(_contact) + except Exception as e: + self.logger.info( + "exception while remove success contact from list to retrieve mails") + print(e) + else: + can_continue = RequestResult.COOKIES_ERROR + else: + can_continue = RequestResult.COOKIES_ERROR + if can_continue == RequestResult.BLOCKED: + self.logger.info("cannot continue, we are blocked " + str(self.valid_csrf)) + elif can_continue == RequestResult.PROXY_ERROR: + self.logger.info("PROXY_ERROR, will not reset valid_csrf") + elif can_continue == RequestResult.COOKIES_ERROR: + self.logger.info("COOKIES_ERROR, will not reset valid_csrf") + else: + self.logger.info("can continue, will reset valid_csrf") + self.valid_csrf = None + return can_continue + else: + return valid_csrf + # return RequestResult.CTRF_ERROR + + def getChTypeJsDataFromDict(self, _received_dict) -> JsDataPojo: + return JsDataPojo(glrd=_received_dict['glrd'], glvd=_received_dict['glvd'], hc=_received_dict['hc'], + ua=_received_dict['ua'], br_oh=_received_dict['br_oh'], br_ow=_received_dict['br_ow'], + ars_h=_received_dict['ars_h'], ars_w=_received_dict['ars_w'], pr=_received_dict['pr'], + plg=_received_dict['plg'], br_h=_received_dict['br_h'], br_w=_received_dict['br_w'], + plu=_received_dict['plu'], vnd=_received_dict['vnd'], dvm=_received_dict['dvm'], + ts_mtp=_received_dict['ts_mtp'], eva=_received_dict['eva'], + rs_h=_received_dict['rs_h'], + rs_w=_received_dict['rs_w'], rs_cd=_received_dict['rs_cd']) + + def generate_proxy(self): + _port = random.randint(40001, 49999) + _chosen_proxy = random.choice(self.proxy_to_use_list) + self.logger.info("generated port is {}".format(_port)) + _proxy_to_use = {} + _proxy_to_use["http"] = _chosen_proxy["http"].format(_port) + _proxy_to_use["https"] = _chosen_proxy["https"].format(_port) + return _proxy_to_use + + def on_message(self, ch, method, properties, body): + _message_count = self.cookiesPublisher.message_count() + self.logger.info("message count in queue is {}".format(_message_count)) + # prepare the contact list + if self.just_send: + self.contact_list = filter_contacts(self.contact_list, self.already_tried_contact_list) + else: + self.contact_list = filter_contacts(self.contact_list) + # remove already booked contacts + random.shuffle(self.contact_list) + _received_object = body.decode("UTF-8") + self.logger.info(f" [x] Received {_received_object}") + step = 5 + _received_dict = json.loads(_received_object) + js_data = self.getChTypeJsDataFromDict(_received_dict) + _received_cookies = _received_dict["cookiesStr"] + if len(self.contact_list) > step: + _sub_list = self.contact_list[0:step] + result = None + for con in _sub_list: + with ThreadPoolExecutor(max_workers=step) as executor: + result = executor.submit(self.send_request, _received_cookies, _received_dict, js_data, self.logger, + con) + self.logger.info("result is: " + str(result.result())) + if result.result() == RequestResult.SUCCESS: + self.logger.info("Success for {}, with cookies{}".format(con.mail, _received_cookies)) + if result.result() == RequestResult.BLOCKED or result.result() == RequestResult.CTRF_ERROR: + ch.basic_ack(delivery_tag=method.delivery_tag) + + else: + ch.basic_reject(delivery_tag=method.delivery_tag, requeue=True) + else: + self.retrieve_invalidate_urls() + self.logger.info("empty list") + time.sleep(120) + self.logger.info("will basic_reject method.delivery_tag: " + str(method.delivery_tag)) + ch.basic_reject(delivery_tag=method.delivery_tag, requeue=True) + + def run(self): + self.logger.info(threading.currentThread().name + " starts") + self.set_up_connection() + self.listen_to_queue(self.on_message) + self.channel.start_consuming() + + def retrieve_invalidate_urls(self): + if not self.already_read_emails and len(self.list_to_retrieve_mails) > 0: + self.logger.info("will retrieve validate urls") + time.sleep(30) + _mail_list = MONGO_STORE_MANAGER.get_destination_emails() + _mail_list_filtered = [] + for mail in _mail_list: + for _contact in self.list_to_retrieve_mails: + if _contact.mail == mail.mail: + _mail_list_filtered.append(mail) + self.logger.info("will call find_links_to_validate_from_mail_list, size = " + str(len(_mail_list_filtered))) + find_links_to_validate_from_mail_list(_mail_list_filtered) + self.already_read_emails = True + else: + self.logger.info("already read emails, will not retrieve validate urls") diff --git a/request_sender.py b/request_sender.py index 69470be..412bb79 100644 --- a/request_sender.py +++ b/request_sender.py @@ -33,7 +33,7 @@ def filter_contacts(_contact_list: list) -> list: for booked in already_sent_contacts: if contact.mail == booked.email: _to_add = False - #如果已经收到链接了,就不要再请求 + # 如果已经收到链接了,就不要再请求 for link_to_validate in _link_to_validate_list: if contact.mail == link_to_validate.email: logger.info("{}: link already received".format(contact.mail)) @@ -69,11 +69,14 @@ def send_appointment_request(message_queue_name, _contact_list): receiver.run() +def start(contact_list_file): + pass + if __name__ == '__main__': - contacts_file_path = '~/Desktop/contact_list_2024-03-23.xlsx' + contacts_file_path = '~/Desktop/contact_list_2024-04-05-2.xlsx' _contact_list = read_contacts(contacts_file_path) _contact_list_to_book = filter_contacts(_contact_list) - _segment_number = 5 + _segment_number = 10 logger.info("{} contacts to book".format(len(_contact_list_to_book))) last_thread = None for i in range(0, _segment_number): diff --git a/workers/proxies_constants.py b/workers/proxies_constants.py index faea1d8..949d3f1 100644 --- a/workers/proxies_constants.py +++ b/workers/proxies_constants.py @@ -71,5 +71,5 @@ DE_PROXY_RES = { # PROXY_LIST = [FR_PROXY_MOBILE, FR_PROXY_RES, DE_PROXY_RES, DE_PROXY_MOBILE, ES_PROXY_MOBILE, IT_PROXY_MOBILE] # PROXY_LIST_FR = [FR_PROXY_MOBILE_2, FR_PROXY_MOBILE, FR_PROXY_RES, FR_PROXY_RES_4, FR_PROXY_RES_2] # PROXY_LIST_FR = [FR_PROXY_RES_OXY, FR_PROXY_MOBILE_3] -PROXY_LIST_FR = [FR_PROXY_MOB_OXY] +PROXY_LIST_FR = [FR_PROXY_RES_OXY] MOBILE_PROXY_LIST_FR = [FR_PROXY_MOB_OXY_STICKY] From 5099e174a8579825e7afdbf1c6d13e256af158d0 Mon Sep 17 00:00:00 2001 From: PAN Lei Date: Mon, 8 Apr 2024 09:38:16 +0200 Subject: [PATCH 18/22] add scheduler.py --- request_sender.py | 12 +++++++----- scheduler.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 5 deletions(-) create mode 100644 scheduler.py diff --git a/request_sender.py b/request_sender.py index 412bb79..d6e237e 100644 --- a/request_sender.py +++ b/request_sender.py @@ -69,11 +69,9 @@ def send_appointment_request(message_queue_name, _contact_list): receiver.run() -def start(contact_list_file): - pass - -if __name__ == '__main__': - contacts_file_path = '~/Desktop/contact_list_2024-04-05-2.xlsx' +def start_send_requests(): + print("start send requests") + contacts_file_path = '~/Desktop/contact_list_2024-04-06-2.xlsx' _contact_list = read_contacts(contacts_file_path) _contact_list_to_book = filter_contacts(_contact_list) _segment_number = 10 @@ -87,3 +85,7 @@ if __name__ == '__main__': last_thread = _thread1 _thread1.start() last_thread.join() + + +if __name__ == '__main__': + start_send_requests() diff --git a/scheduler.py b/scheduler.py new file mode 100644 index 0000000..741e43d --- /dev/null +++ b/scheduler.py @@ -0,0 +1,30 @@ +from apscheduler.executors.pool import ThreadPoolExecutor, ProcessPoolExecutor +from apscheduler.schedulers.blocking import BlockingScheduler + +from request_sender import start_send_requests + + +def start_book_appointment(): + start_send_requests() + + +def start_check_results_job(sched): + sched.add_job(start_book_appointment, 'cron', day_of_week='mon-sat', hour='10', + minute='30', + misfire_grace_time=10, + second='0', timezone='Europe/Paris', max_instances=1, args=[]) + + +def config_and_start_jobs(): + executors = { + 'default': ThreadPoolExecutor(30), + 'processpool': ProcessPoolExecutor(12) + } + sched = BlockingScheduler(executors=executors) + start_check_results_job(sched) + sched.print_jobs() + sched.start() + + +if __name__ == '__main__': + config_and_start_jobs() From 6092fca96c5a01127d65203833be401484e7ecd6 Mon Sep 17 00:00:00 2001 From: PAN Lei Date: Mon, 15 Apr 2024 07:54:13 +0200 Subject: [PATCH 19/22] continue book --- queue_message/appointmentrequestsender.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/queue_message/appointmentrequestsender.py b/queue_message/appointmentrequestsender.py index 10533c1..8c0423c 100644 --- a/queue_message/appointmentrequestsender.py +++ b/queue_message/appointmentrequestsender.py @@ -73,6 +73,7 @@ class AppointmentRequestSender(threading.Thread): self.channel = None self.valid_csrf = None self.list_to_retrieve_mails = sub_contact_list + self.initial_contact_list = sub_contact_list self.contact_list = sub_contact_list self.queue_name = queue_name self.proxy_to_use_list = proxy_to_use_list @@ -202,7 +203,7 @@ class AppointmentRequestSender(threading.Thread): else: self.retrieve_invalidate_urls() self.logger.info("empty list") - time.sleep(120) + time.sleep(60) self.logger.info("will basic_reject method.delivery_tag: " + str(method.delivery_tag)) ch.basic_reject(delivery_tag=method.delivery_tag, requeue=True) else: @@ -229,4 +230,11 @@ class AppointmentRequestSender(threading.Thread): find_links_to_validate_from_mail_list(_mail_list_filtered) self.already_read_emails = True else: - self.logger.info("already read emails, will not retrieve validate urls") + self.logger.info("already read emails, is there any contacts to use") + self.logger.info("reset already_tried_contact_list") + self.already_tried_contact_list = [] + self.contact_list = filter_contacts(self.initial_contact_list, self.already_tried_contact_list) + self.logger.info("contact_list size is " + str(len(self.contact_list))) + if len(self.contact_list) > 0: + self.logger.info("set already_read_emails to False") + self.already_read_emails = False From 3614008c8ed1ed1a4a0fe0a4c33f96f359de1ae9 Mon Sep 17 00:00:00 2001 From: PAN Lei Date: Tue, 16 Apr 2024 11:48:10 +0200 Subject: [PATCH 20/22] add MORNING_DATA_CACHE constant --- queue_message/CookiesPublisher.py | 1 + 1 file changed, 1 insertion(+) diff --git a/queue_message/CookiesPublisher.py b/queue_message/CookiesPublisher.py index 5b1dcd3..1f4b253 100644 --- a/queue_message/CookiesPublisher.py +++ b/queue_message/CookiesPublisher.py @@ -6,6 +6,7 @@ REQUEST_DATA_QUEUE = 'REQUEST_DATA' REQUEST_DATA_QUEUE_TEST = 'REQUEST_DATA_TEST' TEST_QUEUE = 'TEST_QUEUE' SHARED_OBJECT = 'SHARED_OBJECT' +MORNING_DATA_CACHE = 'MORNING_DATA_CACHE' REQUEST_DATA_QUEUE_DE = 'REQUEST_DATA_DE' REQUEST_DATA_OBJECT = 'REQUEST_DATA_OBJECT' REGISTER_QUEUE = 'REGISTER_QUEUE' From 93de0f87a69ee2cd16d99a5c97f5e425ad73d25a Mon Sep 17 00:00:00 2001 From: PAN Lei Date: Tue, 16 Apr 2024 17:36:25 +0200 Subject: [PATCH 21/22] support tags.js version 4.25.1 --- models/jsdata_le_pojo.py | 3 +++ models/jsdata_pojo.py | 6 +++--- workers/captcha_result_getter.py | 6 +++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/models/jsdata_le_pojo.py b/models/jsdata_le_pojo.py index 52a3a82..1f36255 100644 --- a/models/jsdata_le_pojo.py +++ b/models/jsdata_le_pojo.py @@ -37,6 +37,9 @@ class JsDataLeTypePojo(JsDataPojo): self.m_c_c = m_c_c self.m_cm_r = m_cm_r self.m_ms_r = m_ms_r + self.cfpfe = "RXJyb3I6IENhbm5vdCByZWFkIHByb3BlcnRpZXMgb2YgbnVsbA==" # 4.25.1 + self.stcfp = "ckFsbCAoaHR0cHM6Ly9kLmRpZ2l0YWwuaGVybWVzL3RhZ3MuanM6Mjo3NjQ3MykKICAgIGF0IEhUTUxEb2N1bWVudC5yIChodHRwczovL3JlbmRlenZvdXNwYXJpcy5oZXJtZXMuY29tL2Rpc3QvdmVuZG9yLWM2Mjk1ZTljMjg3ZGExNzYwODllLmpzOjI6ODg2Nzkp" # 4.25.1 + test_data_json = """{"glvd": "Qualcomm", "glrd": "Adreno (TM) 630", "hc": 2, "br_oh": 663, "br_ow": 384, "br_h": 663, diff --git a/models/jsdata_pojo.py b/models/jsdata_pojo.py index 8505910..294dfd0 100644 --- a/models/jsdata_pojo.py +++ b/models/jsdata_pojo.py @@ -236,8 +236,8 @@ class JsDataPojo: self.vc1ts = True self.dvm = dvm self.set_default_values() - self.glrd = glrd self.glvd = glvd + self.glrd = glrd self.wwl = False self.jset = int(time.time()) @@ -265,8 +265,8 @@ class JsDataPojo: self.emt = False self.bfr = False self.dbov = False - self.cfpfe = "ZnVuY3Rpb24oKXt2YXIgdD1kb2N1bWVudFsnXHg3MVx4NzVceDY1XHg3Mlx4NzlceDUzXHg2NVx4NmNceDY1XHg2M1x4NzRceDZmXHg3MiddKCdceDYyXHg3Mlx4NmZceDc3XHg3M1x4NjVceDcyXHg2Nlx4NmNceDZmXHg3N1x4MmRceDYzXHg2Zlx4NmVceDc0XHg2" # 4.25.0 - self.stcfp = "Oi8vZC5kaWdpdGFsLmhlcm1lcy90YWdzLmpzOjI6Nzc0NzkpCiAgICBhdCB0LmV4cG9ydHMuZGRfYWIgKGh0dHBzOi8vZC5kaWdpdGFsLmhlcm1lcy90YWdzLmpzOjI6OTA4NTQpCiAgICBhdCBodHRwczovL2QuZGlnaXRhbC5oZXJtZXMvdGFncy5qczoyOjUzMjI1" # 4.25.0 + self.cfpfe = "ZnVuY3Rpb24oKXt2YXIgdD1kb2N1bWVudFsnXHg3MVx4NzVceDY1XHg3Mlx4NzlceDUzXHg2NVx4NmNceDY1XHg2M1x4NzRceDZmXHg3MiddKCdceDYyXHg3Mlx4NmZceDc3XHg3M1x4NjVceDcyXHg2Nlx4NmNceDZmXHg3N1x4MmRceDYzXHg2Zlx4NmVceDc0XHg2" # 4.25.1 + self.stcfp = "aWdpdGFsLmhlcm1lcy90YWdzLmpzOjI6NzY0NzMpCiAgICBhdCA8Y29tcHV0ZWQ+IFthcyBkZF9hYl0gKGh0dHBzOi8vZC5kaWdpdGFsLmhlcm1lcy90YWdzLmpzOjI6OTAxMTgpCiAgICBhdCBodHRwczovL2QuZGlnaXRhbC5oZXJtZXMvdGFncy5qczoyOjUyMjY2" # 4.25.1 self.ckwa = True def to_url_encoded_json(self): diff --git a/workers/captcha_result_getter.py b/workers/captcha_result_getter.py index 5eefa95..7581828 100644 --- a/workers/captcha_result_getter.py +++ b/workers/captcha_result_getter.py @@ -105,10 +105,10 @@ class CaptchaResultGetter: return None def get_ch_raw_data_from_js_data(self, js_data: JsDataPojo, old_valid_cookie) -> str: - _tag_version = "4.25.0" + _tag_version = "4.25.1" _raw_data = "jsData={}&eventCounters=%5B%5D&jsType=ch&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv={}".format( js_data.to_url_encoded_json(), old_valid_cookie, _tag_version) - print("raw data is " + _raw_data) + print("raw ch data is " + _raw_data) return _raw_data def get_valid_ch_cookie(self, proxy_to_use, js_data: JsDataPojo, old_valid_cookie: str = None) -> Union[str, None]: @@ -172,7 +172,7 @@ class CaptchaResultGetter: # _le_js_raw_data = self.get_le_raw_data_from_js_data(js_le_type_data=js_le_type_data, # old_valid_cookie=old_valid_cookie) _cid = get_datadome_cookies(old_valid_cookie) - _raw_data = "jsData={}&eventCounters=%7B%22mousemove%22%3A{}%2C%22click%22%3A{}%2C%22scroll%22%3A{}%2C%22touchstart%22%3A{}%2C%22touchend%22%3A{}%2C%22touchmove%22%3A{}%2C%22keydown%22%3A{}%2C%22keyup%22%3A{}%7D&jsType=le&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv=4.25.0".format( + _raw_data = "jsData={}&eventCounters=%7B%22mousemove%22%3A{}%2C%22click%22%3A{}%2C%22scroll%22%3A{}%2C%22touchstart%22%3A{}%2C%22touchend%22%3A{}%2C%22touchmove%22%3A{}%2C%22keydown%22%3A{}%2C%22keyup%22%3A{}%7D&jsType=le&cid={}&ddk=789361B674144528D0B7EE76B35826&Referer=https%253A%252F%252Frendezvousparis.hermes.com%252Fclient%252Fregister&request=%252Fclient%252Fregister&responsePage=origin&ddv=4.25.1".format( js_le_type_data.to_url_encoded_json(), mousemove_count, click_count, scroll_count, touch_count, touch_count, touch_move, key_count, From 32c9d002b351a933a89776ae9bbce9985a553fa2 Mon Sep 17 00:00:00 2001 From: PAN Lei Date: Wed, 17 Apr 2024 15:09:59 +0200 Subject: [PATCH 22/22] support tags.js version 4.25.1 --- request_sender.py | 2 +- scheduler_test.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 scheduler_test.py diff --git a/request_sender.py b/request_sender.py index d6e237e..cb36878 100644 --- a/request_sender.py +++ b/request_sender.py @@ -71,7 +71,7 @@ def send_appointment_request(message_queue_name, _contact_list): def start_send_requests(): print("start send requests") - contacts_file_path = '~/Desktop/contact_list_2024-04-06-2.xlsx' + contacts_file_path = '~/Desktop/contact_list_2024-04-16_2.xlsx' _contact_list = read_contacts(contacts_file_path) _contact_list_to_book = filter_contacts(_contact_list) _segment_number = 10 diff --git a/scheduler_test.py b/scheduler_test.py new file mode 100644 index 0000000..28144d7 --- /dev/null +++ b/scheduler_test.py @@ -0,0 +1,30 @@ +from apscheduler.executors.pool import ThreadPoolExecutor, ProcessPoolExecutor +from apscheduler.schedulers.blocking import BlockingScheduler + +from request_sender_test import start_send_requests + + +def start_book_appointment(): + start_send_requests() + + +def start_check_results_job(sched): + sched.add_job(start_book_appointment, 'cron', day_of_week='mon-sat', hour='10', + minute='30', + misfire_grace_time=10, + second='0', timezone='Europe/Paris', max_instances=1, args=[]) + + +def config_and_start_jobs(): + executors = { + 'default': ThreadPoolExecutor(30), + 'processpool': ProcessPoolExecutor(12) + } + sched = BlockingScheduler(executors=executors) + start_check_results_job(sched) + sched.print_jobs() + sched.start() + + +if __name__ == '__main__': + config_and_start_jobs()