diff --git a/db/mongo_manager.py b/db/mongo_manager.py index ed4c2f6..dc19c6e 100755 --- a/db/mongo_manager.py +++ b/db/mongo_manager.py @@ -1,5 +1,6 @@ import datetime import logging +import time from pymongo import MongoClient @@ -82,5 +83,29 @@ class MongoDbManager: self.logger.info(error) return link_list + def link_validated_for_result(self, link: str, linkPojo: LinkPojo, state=True, is_duplicated=False): + print("link_validated_for_result() called with url = " + link) + if is_duplicated: + _id = link.split("/")[-2] + else: + _id = link.split("/")[-1] + print("link_validated_for_result() called with id = " + _id) + + collection_name = str(datetime.date.today()) + print("link_validated_for_result() called with collection_name = " + collection_name) + + collection = self.db[collection_name] + validated_at = time.strftime("%H:%M:%S", time.localtime()) + validated_by = "requests" + if is_duplicated: + validated_by = "Double" + collection.find_one_and_update({'_id': _id}, { + "$set": {"url_validated": state, "validated_at": validated_at, "id": _id, "email": linkPojo.email, + "validated_by": validated_by}}, + upsert=True) + # remove the link from db + collection_to_use = self.db[LINKS_TO_VALIDATE] + collection_to_use.delete_one({'_id': linkPojo.email}) + MONGO_STORE_MANAGER = MongoDbManager() diff --git a/queue_message/CookiesPublisher.py b/queue_message/CookiesPublisher.py new file mode 100644 index 0000000..5c9bea6 --- /dev/null +++ b/queue_message/CookiesPublisher.py @@ -0,0 +1,21 @@ +import pika + +QUEUE_HOST = "appointment.lpaconsulting.fr" +REQUEST_DATA_QUEUE = 'REQUEST_DATA' +credentials = pika.PlainCredentials('appointment', 'ZyuhJZ2xEYWhElhpJjy7YEpZGZwNYJz2fHIu') + + +class CookiesPublisher: + + def __init__(self): + self.connection = None + self.channel = None + + def set_up_connection(self): + self.connection = pika.BlockingConnection( + pika.ConnectionParameters(host=QUEUE_HOST, port=5672, credentials=credentials)) + self.channel = self.connection.channel() + # self.channel.queue_declare(queue=REQUEST_DATA_QUEUE, durable=True) + + def publish_body(self, body: str): + self.channel.basic_publish(exchange='', routing_key=REQUEST_DATA_QUEUE, body=body) diff --git a/queue_message/link_validator_receiver.py b/queue_message/link_validator_receiver.py index 11494f0..66abdef 100644 --- a/queue_message/link_validator_receiver.py +++ b/queue_message/link_validator_receiver.py @@ -67,8 +67,9 @@ def get_valid_csrf() -> str: class LinkValidatorReceiver(threading.Thread): - def __init__(self): + def __init__(self, linkpojo_list: list): self.connection = None + self.linkpojo_list = linkpojo_list self.channel = None def set_up_connection(self): @@ -83,41 +84,42 @@ class LinkValidatorReceiver(threading.Thread): def on_message(self, ch, method, properties, body): print(f" [x] Received {body}") - link_list = MONGO_STORE_MANAGER.get_links_to_validate() # remove already booked contacts - random.shuffle(link_list) - link = random.choice(link_list) - _link_validator = LinkValidator(body.decode("UTF-8"), link_to_validate=link) - _link_validator.send_request() - - # if len(self.contact_list) > 0: - # print("contact number is {}".format(len(self.contact_list))) - # for con in self.contact_list: - # sender = Sender(body.decode("UTF-8")) - # # if not is_already_sent(con): - # print(con.mail) - # # time.sleep(random.randint(1, 5)) - # if self.valid_csrf is None: - # self.valid_csrf = get_valid_csrf() - # can_continue = sender.send_request(HERMES_REGISTER, con, csrf=self.valid_csrf) - # if not can_continue: - # print("cannot continue, valid_csrf is " + self.valid_csrf) - # break - # else: - # print("can continue, will reset valid_csrf") - # self.valid_csrf = None - # # else: - # # print(con.mail + "--> skip") - # ch.basic_ack(delivery_tag=method.delivery_tag) - # else: - # print("empty list") + # random.shuffle(link_list) + # link = random.choice(link_list) + # _link_validator = LinkValidator(body.decode("UTF-8"), link_to_validate=link) + # _link_validator.send_request() + # + # print(f" [x] Received {body}") + # sender = Sender(body.decode("UTF-8")) + # remove already booked contacts + random.shuffle(self.linkpojo_list) + if len(self.linkpojo_list) > 0: + print("contact number is {}".format(len(self.linkpojo_list))) + for con in self.linkpojo_list: + # if not is_already_sent(con): + print(con.email) + can_continue = self.send_request(HERMES_REGISTER, con, csrf=self.valid_csrf) + if not can_continue: + print("cannot continue, valid_csrf is " + self.valid_csrf) + break + else: + print("can continue, will reset valid_csrf") + self.valid_csrf = None + # else: + # print(con.mail + "--> skip") + ch.basic_ack(delivery_tag=method.delivery_tag) + else: + print("empty list") def run(self): print(threading.currentThread().name + " starts") self.set_up_connection() self.listen_to_queue(self.on_message) self.channel.start_consuming() -# if __name__ == '__main__': -# receiver = Receiver() -# receiver.set_up_connection() -# receiver.listen_to_queue(on_message) + + +if __name__ == '__main__': + link_list = MONGO_STORE_MANAGER.get_links_to_validate() + receiver = LinkValidatorReceiver(link_list) + receiver.run() diff --git a/queue_message/receiver.py b/queue_message/receiver.py index 428cbfd..9d43d32 100644 --- a/queue_message/receiver.py +++ b/queue_message/receiver.py @@ -7,6 +7,7 @@ import pika from db.mongo_manager import MONGO_STORE_MANAGER from models.contact_pojo import ContactPojo +from queue_message.CookiesPublisher import CookiesPublisher from workers.captcha_result_getter import CaptchaResultGetter, HERMES_REGISTER from workers.sender import Sender @@ -66,8 +67,10 @@ def get_valid_csrf() -> str: class Receiver(threading.Thread): - def __init__(self, sub_contact_list: list): + def __init__(self, sub_contact_list: list, cookiesPublisher: CookiesPublisher): + super().__init__() self.connection = None + self.cookiesPublisher = cookiesPublisher self.channel = None self.valid_csrf = None self.contact_list = sub_contact_list @@ -84,21 +87,22 @@ class Receiver(threading.Thread): def on_message(self, ch, method, properties, body): print(f" [x] Received {body}") - sender = Sender(body.decode("UTF-8")) + sender = Sender(body.decode("UTF-8"), cookiesPublisher=self.cookiesPublisher) self.contact_list = filter_contacts(self.contact_list) # remove already booked contacts random.shuffle(self.contact_list) if len(self.contact_list) > 0: + captchaResultGetter = CaptchaResultGetter() print("contact number is {}".format(len(self.contact_list))) for con in self.contact_list: # if not is_already_sent(con): print(con.mail) # time.sleep(random.randint(1, 5)) if self.valid_csrf is None: - self.valid_csrf = get_valid_csrf() + self.valid_csrf = captchaResultGetter.get_csrf(body.decode("UTF-8")) can_continue = sender.send_request(HERMES_REGISTER, con, csrf=self.valid_csrf) if not can_continue: - print("cannot continue, valid_csrf is " + self.valid_csrf) + print("cannot continue, valid_csrf is " + str(self.valid_csrf)) break else: print("can continue, will reset valid_csrf") diff --git a/request_sender.py b/request_sender.py index 88cda0d..2fb4983 100644 --- a/request_sender.py +++ b/request_sender.py @@ -1,13 +1,14 @@ +import datetime import random import time -from concurrent.futures import as_completed -from concurrent.futures.thread import ThreadPoolExecutor from http.cookies import SimpleCookie from db.mongo_manager import MONGO_STORE_MANAGER from excel_reader import read_contacts from models.contact_pojo import ContactPojo -from queue_message.receiver import Receiver, filter_contacts +from queue_message.CookiesPublisher import CookiesPublisher +from queue_message.receiver import Receiver +from utiles import is_time_between from workers.captcha_result_getter import CaptchaResultGetter, HERMES_REGISTER from workers.sender import Sender @@ -15,52 +16,63 @@ IPFIY = 'http://api.ipify.org' NGROK_TEST = "https://bcc6-193-164-156-53.ngrok-free.app" -def handle_cookie(cookie: str): - contact_list = read_contacts('/Users/panlei/Desktop/real_name_contacts_77_14_01_2024.xlsx') - print(f" [x] Received {cookie}") - sender = Sender(cookie) - contact_list = filter_contacts(contact_list) - # remove already booked contacts - random.shuffle(contact_list) - if len(contact_list) > 0: - print("contact number is {}".format(len(contact_list))) - for con in contact_list: - # if not is_already_sent(con): - print(con.mail) - # time.sleep(random.randint(1, 5)) - # if self.valid_csrf is None: - # self.valid_csrf = get_valid_csrf() - can_continue = sender.send_request(HERMES_REGISTER, con) - if not can_continue: - print("cannot continue, valid_csrf is ") - break - else: - print("can continue, will reset valid_csrf") - # else: - # print(con.mail + "--> skip") - else: - print("empty list") +def is_already_sent(contact: ContactPojo) -> bool: + already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day() + for required_contact in already_sent_contacts: + if contact.mail == required_contact.email: + return True + return False + + +def filter_contacts(_contact_list: list) -> list: + already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day() + _contact_list_to_book = [] + for contact in _contact_list: + _to_add = True + for booked in already_sent_contacts: + if contact.mail == booked.email: + _to_add = False + if _to_add: + _contact_list_to_book.append(contact) + + return _contact_list_to_book + + +def get_valid_csrf() -> str: + captchaResultGetter = CaptchaResultGetter() + _valid_cookie = captchaResultGetter.get_valid_cookie() + # while _valid_cookie is None: + # _valid_cookie = captchaResultGetter.get_valid_cookie() + new_csrf = None + while new_csrf is None and is_open(): + valid_cookie = None + if _valid_cookie is not None: + simple_cookie = SimpleCookie() + simple_cookie.load(_valid_cookie) + new_cookies = {k: v.value for k, v in simple_cookie.items()} + new_coolies_str = "" + for key in new_cookies: + print(key) + new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";" + print(new_coolies_str) + valid_cookie = new_coolies_str + "app=eyJmbGFzaCI6e30sImNhY2hlZmxhc2giOltdLCJjc3JmU2VjcmV0IjoiYnRodHNYU1lvdnl4RzVGakpGRDZsQ0JtIn0=;policy=accepted;lang=fr;" + print(valid_cookie) + new_csrf = captchaResultGetter.get_csrf(valid_cookie) + if new_csrf is None: + _valid_cookie = None + while _valid_cookie is None: + _valid_cookie = captchaResultGetter.get_valid_cookie() + time.sleep(2) + return new_csrf + + +def is_open(): + return is_time_between(datetime.time(10, 30), datetime.time(19, 00)) if __name__ == '__main__': - captchaResultGetter = CaptchaResultGetter() - ch_valid_cookies = captchaResultGetter.get_valid_ch_cookie() - valid_cookies = captchaResultGetter.get_valid_cookie(old_valid_cookie=ch_valid_cookies) - print(ch_valid_cookies) - valid_cookies = valid_cookies + ";app=eyJmbGFzaCI6e30sImNhY2hlZmxhc2giOltdLCJjc3JmU2VjcmV0IjoidVdfTVhUbnhmbnBYQVlJTDlKTUtCZDQtIiwiYXBwb2ludG1lbnRfY29kZSI6IlRGOTlOVSIsImJsb2NrX3JlZ2lzdHJhdGlvbiI6ZmFsc2V9;policy=accepted;lang=fr;app.sig=JzJN6KT0vNudm29rf3toN_Cx8wI;" - valid_cookies = valid_cookies.replace(" ", "").replace("Path=/;", "").replace("Secure;", "").replace( - "SameSite=None;", "").replace("Domain=.hermes.com;", "").replace("Max-Age=31536000;", "") - print(valid_cookies) - - handle_cookie(valid_cookies) - # contact_list = read_contacts('/Users/panlei/Desktop/yahoo_aol_26_3_valid.xlsx') - # contact_list = filter_contacts(contact_list) - # index = int(len(contact_list) / 2) - # first_half = contact_list[:index] - # second_half = contact_list[index:] - # to_book = [first_half, second_half] - # for li in to_book: - # receiver = Receiver(contact_list) - # receiver.run() - # receiver.set_up_connection() - # receiver.listen_to_queue(receiver.on_message) + cookiesPublisher = CookiesPublisher() + cookiesPublisher.set_up_connection() + contact_list = read_contacts('/Users/panlei/Desktop/real_name_contacts_77_14_01_2024.xlsx') + receiver = Receiver(sub_contact_list=contact_list, cookiesPublisher=cookiesPublisher) + receiver.run() diff --git a/utiles.py b/utiles.py new file mode 100644 index 0000000..39e6199 --- /dev/null +++ b/utiles.py @@ -0,0 +1,14 @@ +from datetime import datetime, time + + +def is_time_between(begin_time, end_time, check_time=None): + # If check time is not given, default to current UTC time + check_time = check_time or datetime.now().time() + if begin_time < end_time: + return begin_time <= check_time <= end_time + else: # crosses midnight + return check_time >= begin_time or check_time <= end_time + + +if __name__ == '__main__': + print(is_time_between(time(00, 00), time(16, 30))) diff --git a/workers/link_validator.py b/workers/link_validator.py index 72e9d90..ec70568 100644 --- a/workers/link_validator.py +++ b/workers/link_validator.py @@ -1,27 +1,45 @@ import random +import threading +import time from http.cookies import SimpleCookie + +import pika import requests +from db.mongo_manager import MONGO_STORE_MANAGER from models.LinkPojo import LinkPojo +from queue_message.CookiesPublisher import CookiesPublisher +from queue_message.receiver import QUEUE_HOST, REQUEST_DATA_QUEUE, credentials from workers.proxies_constants import PROXY_LIST -class LinkValidator: +class LinkValidator(threading.Thread): - def __init__(self, cookie_str, link_to_validate: LinkPojo): + def __init__(self, link_to_validate_list: list, cookiesPublisher: CookiesPublisher): + super().__init__() self.cookie = SimpleCookie() - self.link = link_to_validate.url + self.cookiesPublisher = cookiesPublisher + self.link_to_validate_list = link_to_validate_list # self.cookie_str = 'datadome=~pxdHFAvsQl2rvDrTzhPgCHxu~4TBcePTTE~Cy8Rgol6oMRc11gA02VRp0Z3uEDUszCjacubNu7vbfQCh27gz8RC10u_325pt_gsMmJh1ScGvOofVJiVAbEKvSEUjd82;policy=accepted;app.sig=PhjmDkq_dI49pADppDNKxpLe_G4;app=eyJmbGFzaCI6e30sImNhY2hlZmxhc2giOltdLCJjc3JmU2VjcmV0IjoiYnRodHNYU1lvdnl4RzVGakpGRDZsQ0JtIn0=;lang=fr;' - self.cookie_str = cookie_str - self.cookie.load(self.cookie_str) - def send_request(self): + def set_up_connection(self): + self.connection = pika.BlockingConnection( + pika.ConnectionParameters(host=QUEUE_HOST, port=5672, credentials=credentials)) + self.channel = self.connection.channel() + + def listen_to_queue(self, callback): + self.channel.basic_qos(prefetch_count=1) + self.channel.basic_consume(queue=REQUEST_DATA_QUEUE, auto_ack=False, on_message_callback=callback) + self.channel.start_consuming() + + def send_request(self, linkPojo: LinkPojo) -> bool: + self.cookie.load(self.cookie_str) headers = { 'User-Agent': 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36', 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate, br', 'Cache-Control': 'max-age=0', - 'Referer': self.link, + 'Referer': linkPojo.url, 'Cookie': self.cookie_str, 'Sec-Fetch-Mode': 'navigate', 'Host': 'rendezvousparis.hermes.com', @@ -32,20 +50,58 @@ class LinkValidator: print(proxy_to_use) print("received cookie is " + str(self.cookie_str)) try: - response = requests.get(url=self.link, headers=headers, verify=False, proxies=proxy_to_use, + response = requests.get(url=linkPojo.url, headers=headers, verify=False, proxies=proxy_to_use, timeout=15) print(response.status_code) if response.status_code == 200: print(response.text) print(response.url) + MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo) + # set new cookies + _cookies_to_set = response.headers['set-cookie'] + self.cookie.load(_cookies_to_set) + new_cookies = {k: v.value for k, v in self.cookie.items()} + new_coolies_str = "" + for key in new_cookies: + new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";" + print("will publish to queue {}".format(new_coolies_str)) + # upload the cookie to queue + self.cookiesPublisher.publish_body(new_coolies_str) + self.cookie_str = new_coolies_str + return True else: - return None + return False except Exception as error: print(error) + return False + + def on_message(self, ch, method, properties, body): + print(f" [x] Received {body}") + self.link_to_validate_list = MONGO_STORE_MANAGER.get_links_to_validate() + self.cookie_str = body.decode("UTF-8") + random.shuffle(self.link_to_validate_list) + if len(self.link_to_validate_list) > 0: + print("links number is {}".format(len(self.link_to_validate_list))) + for con in self.link_to_validate_list: + # if not is_already_sent(con): + print(con.email) + can_continue = self.send_request(con) + if not can_continue: + print("cannot continue, skip") + break + else: + time.sleep(random.randint(2, 5)) + print("can continue, continue") + print("will ack") + ch.basic_ack(delivery_tag=method.delivery_tag) + else: + print("empty list") if __name__ == '__main__': - link_validator = LinkValidator( - cookie_str="W3BqZ5LSU5~Yl1LAd68HLGCuSELSQzuyPC1xuM0MpMLwX13p0HxwYL4mxM6BgmtLe0TwMyJKRnOdZgMW2CzGW0h53oQDNioqXAt0GbiDbiY5R_AaUajk_O7MtDqPwFCB;policy=accepted;app.sig=C3MpoPl8yFlKfm6UIrgnxYqs8Hs;app=eyJmbGFzaCI6e30sImNhY2hlZmxhc2giOltdLCJjc3JmU2VjcmV0IjoiV3Z6QWJGX09jZXpQREFnZW9PcnM4eXNIIn0=;lang=fr;", - link_to_validate="https://rendezvousparis.hermes.com/client/register/55TCMY/validate?code=011367") - link_validator.send_request() + link_list = MONGO_STORE_MANAGER.get_links_to_validate() + cookiesPublisher = CookiesPublisher() + cookiesPublisher.set_up_connection() + receiver = LinkValidator(link_to_validate_list=link_list, cookiesPublisher=cookiesPublisher) + receiver.set_up_connection() + receiver.listen_to_queue(receiver.on_message) diff --git a/workers/sender.py b/workers/sender.py index 3a3a569..0e10a9c 100644 --- a/workers/sender.py +++ b/workers/sender.py @@ -7,16 +7,19 @@ import requests from db.mongo_manager import MONGO_STORE_MANAGER from models.ReserveResultPojo import ReserveResultPojo, PublishType +from queue_message.CookiesPublisher import CookiesPublisher from workers.proxies_constants import PROXY_LIST class Sender: - def __init__(self, cookie_str): + def __init__(self, cookie_str, cookiesPublisher: CookiesPublisher): self.store_type = "random" self.cookie = SimpleCookie() + self.cookiesPublisher = cookiesPublisher # self.cookie_str = 'datadome=~pxdHFAvsQl2rvDrTzhPgCHxu~4TBcePTTE~Cy8Rgol6oMRc11gA02VRp0Z3uEDUszCjacubNu7vbfQCh27gz8RC10u_325pt_gsMmJh1ScGvOofVJiVAbEKvSEUjd82;policy=accepted;app.sig=PhjmDkq_dI49pADppDNKxpLe_G4;app=eyJmbGFzaCI6e30sImNhY2hlZmxhc2giOltdLCJjc3JmU2VjcmV0IjoiYnRodHNYU1lvdnl4RzVGakpGRDZsQ0JtIn0=;lang=fr;' self.cookie_str = cookie_str + self._csrf = None self.cookie.load(self.cookie_str) def publish_message_to_queue(self, contact: ContactPojo, status: PublishType, url: str): @@ -41,8 +44,12 @@ class Sender: def send_request(self, url, contact: ContactPojo, csrf: str = None): if csrf is None: - _csrf = '8Bs2dBwb-nHONOzo9Tei2CcMZglEfsRqUz8E' + if self._csrf is None: + _csrf = '8Bs2dBwb-nHONOzo9Tei2CcMZglEfsRqUz8E' + else: + _csrf = self._csrf else: + self._csrf = csrf _csrf = csrf headers = {'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36', @@ -74,7 +81,9 @@ class Sender: new_coolies_str = "" for key in new_cookies: new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";" - print(new_coolies_str) + print("will publish to queue {}".format(new_coolies_str)) + # upload the cookie to queue + self.cookiesPublisher.publish_body(new_coolies_str) self.cookie_str = new_coolies_str return True else: