From 75091c0a6b87fd0df3c635b5b7ad67e2c84c0d61 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Sat, 30 Mar 2024 16:12:09 +0100 Subject: [PATCH] rename the method --- queue_message/appointmentrequestsender.py | 17 +++++----- workers/link_validator.py | 41 ++++++++++++++++------- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/queue_message/appointmentrequestsender.py b/queue_message/appointmentrequestsender.py index 118c69a..bc873bf 100644 --- a/queue_message/appointmentrequestsender.py +++ b/queue_message/appointmentrequestsender.py @@ -157,6 +157,7 @@ class AppointmentRequestSender(threading.Thread): rs_w=_received_dict['rs_w'], rs_cd=_received_dict['rs_cd'], m_s_c=m_s_c, m_m_c=m_m_c, m_c_c=m_c_c, m_cm_r=m_cm_r, m_ms_r=m_ms_r) + time.sleep(random.randint(1, 4)) _new_le_cookies = captchaResultGetter.get_le_valid_cookie(proxy_to_use=_proxy_to_use, js_le_type_data=js_le_data, old_valid_cookie=_new_cookies) @@ -193,7 +194,7 @@ class AppointmentRequestSender(threading.Thread): self.logger.info("will ack method.delivery_tag: " + str(method.delivery_tag)) ch.basic_ack(delivery_tag=method.delivery_tag) else: - self.retreive_invalidate_urls() + self.retrieve_invalidate_urls() self.logger.info("empty list") time.sleep(120) self.logger.info("will basic_reject method.delivery_tag: " + str(method.delivery_tag)) @@ -208,18 +209,18 @@ class AppointmentRequestSender(threading.Thread): self.listen_to_queue(self.on_message) self.channel.start_consuming() - def retreive_invalidate_urls(self): + def retrieve_invalidate_urls(self): if not self.already_read_emails and len(self.list_to_retrieve_mails) > 0: - self.logger.info("will retreive_invalidate_urls") + self.logger.info("will retrieve validate urls") time.sleep(30) _mail_list = MONGO_STORE_MANAGER.get_destination_emails() - _mail_list_filtred = [] + _mail_list_filtered = [] for mail in _mail_list: for _contact in self.list_to_retrieve_mails: if _contact.mail == mail.mail: - _mail_list_filtred.append(mail) - self.logger.info("will call find_links_to_validate_from_mail_list, size = " + str(len(_mail_list_filtred))) - find_links_to_validate_from_mail_list(_mail_list_filtred) + _mail_list_filtered.append(mail) + self.logger.info("will call find_links_to_validate_from_mail_list, size = " + str(len(_mail_list_filtered))) + find_links_to_validate_from_mail_list(_mail_list_filtered) self.already_read_emails = True else: - self.logger.info("already read emails, will not retreive_invalidate_urls") + self.logger.info("already read emails, will not retrieve validate urls") diff --git a/workers/link_validator.py b/workers/link_validator.py index a5f881c..7f85dda 100644 --- a/workers/link_validator.py +++ b/workers/link_validator.py @@ -1,3 +1,4 @@ +import json import random import threading import time @@ -9,7 +10,7 @@ import requests from db.mongo_manager import MONGO_STORE_MANAGER from models.LinkPojo import LinkPojo from models.result_pojo import RequestResult -from queue_message.CookiesPublisher import CookiesPublisher, REQUEST_DATA_QUEUE_TEST +from queue_message.CookiesPublisher import CookiesPublisher, REQUEST_DATA_QUEUE_TEST, TEST_QUEUE, SHARED_OBJECT from queue_message.appointmentrequestsender import QUEUE_HOST, REQUEST_DATA_QUEUE, credentials from workers.proxies_constants import PROXY_LIST_FR @@ -30,7 +31,6 @@ class LinkValidator(threading.Thread): self.filter_with_ip_country() self.proxy_to_use = proxy_to_use self.limit = limit - # self.cookie_str = 'datadome=~pxdHFAvsQl2rvDrTzhPgCHxu~4TBcePTTE~Cy8Rgol6oMRc11gA02VRp0Z3uEDUszCjacubNu7vbfQCh27gz8RC10u_325pt_gsMmJh1ScGvOofVJiVAbEKvSEUjd82;policy=accepted;app.sig=PhjmDkq_dI49pADppDNKxpLe_G4;app=eyJmbGFzaCI6e30sImNhY2hlZmxhc2giOltdLCJjc3JmU2VjcmV0IjoiYnRodHNYU1lvdnl4RzVGakpGRDZsQ0JtIn0=;lang=fr;' def set_up_connection(self): self.connection = pika.BlockingConnection( @@ -42,10 +42,13 @@ class LinkValidator(threading.Thread): self.channel.basic_consume(queue=self.queue_to_listen, auto_ack=False, on_message_callback=callback) self.channel.start_consuming() - def send_request(self, linkPojo: LinkPojo) -> RequestResult: + def send_request(self, linkPojo: LinkPojo, _received_dict = None) -> RequestResult: + _ua = 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36' + if _received_dict is not None: + _ua = _received_dict['ua'] self.cookie.load(self.cookie_str) headers = { - 'User-Agent': 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36', + 'User-Agent': _ua, 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate, br', 'Cache-Control': 'max-age=0', @@ -59,6 +62,8 @@ class LinkValidator(threading.Thread): print(self.proxy_to_use) print("received cookie is " + str(self.cookie_str)) try: + print("will send request with ua {}".format(_ua)) + print("will send request with cookie {}".format(self.cookie_str)) response = requests.get(url=linkPojo.url, headers=headers, verify=False, proxies=self.proxy_to_use, timeout=15) print(response.status_code) @@ -77,7 +82,12 @@ class LinkValidator(threading.Thread): new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";" print("will publish to queue {}".format(new_coolies_str)) # upload the cookie to queue - self.cookiesPublisher.publish_body(new_coolies_str) + if _received_dict is not None: + _received_dict['cookiesStr'] = new_coolies_str + print("body in json:{}".format(json.dumps(_received_dict))) + self.cookiesPublisher.publish_body(json.dumps(_received_dict)) + else: + self.cookiesPublisher.publish_body(new_coolies_str) self.cookie_str = new_coolies_str return RequestResult.SUCCESS elif DOUBLE_MESSAGE in _content: @@ -109,15 +119,23 @@ class LinkValidator(threading.Thread): print("message count in queue is {}".format(_message_in_queue_count)) self.link_to_validate_list = MONGO_STORE_MANAGER.get_links_to_validate() self.filter_with_ip_country() - self.cookie_str = body.decode("UTF-8") + _received_object = body.decode("UTF-8") + js_data = None + _received_dict = None + if "glrd" in _received_object: + _received_dict = json.loads(_received_object) + _received_cookies = _received_dict["cookiesStr"] + else: + _received_cookies = _received_object + self.cookie_str = _received_cookies random.shuffle(self.link_to_validate_list) - if len(self.link_to_validate_list) > 0 and _message_in_queue_count > self.limit: + if len(self.link_to_validate_list) > 0 and _message_in_queue_count >= self.limit: print("links number is {}".format(len(self.link_to_validate_list))) for con in self.link_to_validate_list: # if not is_already_sent(con): print(con.email) self.proxy_to_use = random.choice(PROXY_LIST_FR) - can_continue = self.send_request(con) + can_continue = self.send_request(con, _received_dict) if can_continue == RequestResult.BLOCKED: print("cannot continue, blocked, then skip") break @@ -140,9 +158,9 @@ class LinkValidator(threading.Thread): def validate_with_FR_ip(): - _queue_name = REQUEST_DATA_QUEUE + _queue_name = TEST_QUEUE link_list = MONGO_STORE_MANAGER.get_links_to_validate() - cookiesPublisher = CookiesPublisher(queue_name=_queue_name) + cookiesPublisher = CookiesPublisher(queue_name=SHARED_OBJECT) cookiesPublisher.set_up_connection() print("filter links with ip_country") _link_list_to_click = [] @@ -152,10 +170,9 @@ def validate_with_FR_ip(): for _l in _link_list_to_click: print(_l.ip_country) _fr_proxy_to_use = random.choice(PROXY_LIST_FR) - # random.shuffle(_link_list_to_click) receiver = LinkValidator(link_to_validate_list=_link_list_to_click, cookiesPublisher=cookiesPublisher, proxy_to_use=_fr_proxy_to_use, - queue_to_listen=_queue_name, ip_country="FR", limit=50) + queue_to_listen=_queue_name, ip_country="FR", limit=0) print("will connect to queue") receiver.set_up_connection() receiver.listen_to_queue(receiver.on_message)