Merge branch 'refs/heads/master' into feature/4_32_1

This commit is contained in:
2024-07-21 20:19:02 +02:00
3 changed files with 26 additions and 6 deletions
+1
View File
@@ -3,6 +3,7 @@ from enum import Enum
class RequestResult(Enum): class RequestResult(Enum):
BLOCKED = "BLOCKED" BLOCKED = "BLOCKED"
CSRF_BLOCKED = "CSRF_BLOCKED"
PROXY_ERROR = "PROXY_ERROR" PROXY_ERROR = "PROXY_ERROR"
SUCCESS = "SUCCESS" SUCCESS = "SUCCESS"
COOKIES_ERROR = "COOKIES_ERROR" COOKIES_ERROR = "COOKIES_ERROR"
+17 -1
View File
@@ -64,12 +64,14 @@ def is_open():
class AppointmentRequestSender(threading.Thread): class AppointmentRequestSender(threading.Thread):
def __init__(self, sub_contact_list: list, logger, cookiesPublisher: CookiesPublisher, def __init__(self, sub_contact_list: list, logger, cookiesPublisher: CookiesPublisher,
bakeUpCookiesPublisher: CookiesPublisher,
queue_name=REQUEST_DATA_QUEUE): queue_name=REQUEST_DATA_QUEUE):
super().__init__() super().__init__()
self.connection = None self.connection = None
self.logger = logger self.logger = logger
self.already_tried_contact_list = [] self.already_tried_contact_list = []
self.cookiesPublisher = cookiesPublisher self.cookiesPublisher = cookiesPublisher
self.bakeUpCookiesPublisher = bakeUpCookiesPublisher
self.channel = None self.channel = None
self.valid_csrf = None self.valid_csrf = None
self.list_to_retrieve_mails = sub_contact_list self.list_to_retrieve_mails = sub_contact_list
@@ -117,6 +119,7 @@ class AppointmentRequestSender(threading.Thread):
captchaResultGetter = CaptchaResultGetter() captchaResultGetter = CaptchaResultGetter()
self.logger.info("contact number is {}".format(len(self.contact_list))) self.logger.info("contact number is {}".format(len(self.contact_list)))
# self.contact_list = filter_contacts(self.contact_list) # self.contact_list = filter_contacts(self.contact_list)
can_continue = None
for con in self.contact_list: for con in self.contact_list:
# _proxy_to_use = self.proxy_manager.get_proxy_for_appointment_request() # _proxy_to_use = self.proxy_manager.get_proxy_for_appointment_request()
# print(_proxy_to_use) # print(_proxy_to_use)
@@ -133,6 +136,7 @@ class AppointmentRequestSender(threading.Thread):
else: else:
self.logger.info("csrf is {}".format(csrf_result)) self.logger.info("csrf is {}".format(csrf_result))
if csrf_result == RequestResult.BLOCKED: if csrf_result == RequestResult.BLOCKED:
can_continue = RequestResult.CSRF_BLOCKED
break break
_new_cookies = captchaResultGetter.get_valid_ch_cookie(sender.proxy_to_use, js_data, _new_cookies = captchaResultGetter.get_valid_ch_cookie(sender.proxy_to_use, js_data,
old_valid_cookie=_received_cookies) old_valid_cookie=_received_cookies)
@@ -200,11 +204,23 @@ class AppointmentRequestSender(threading.Thread):
self.valid_csrf = None self.valid_csrf = None
time.sleep(random.randint(1, 2)) time.sleep(random.randint(1, 2))
self.logger.info("will ack method.delivery_tag: " + str(method.delivery_tag)) self.logger.info("will ack method.delivery_tag: " + str(method.delivery_tag))
if can_continue is not None and can_continue == RequestResult.CSRF_BLOCKED:
# 如果在发送请求时出现csrf被拦截的情况,那么就需要重新发布cookie以目前的队列中,因为这个cookie可能重新利用
self.logger.info("csrf blocked, will republish cookie")
self.cookiesPublisher.publish_body(_received_object)
self.logger.info("csrf blocked, will wait 60 seconds")
time.sleep(60)
ch.basic_ack(delivery_tag=method.delivery_tag)
elif can_continue is not None and can_continue == RequestResult.BLOCKED:
self.logger.info("这个cookies可以给点链接用")
self.bakeUpCookiesPublisher.publish_body(_received_object)
ch.basic_ack(delivery_tag=method.delivery_tag)
else:
ch.basic_ack(delivery_tag=method.delivery_tag) ch.basic_ack(delivery_tag=method.delivery_tag)
else: else:
self.retrieve_invalidate_urls() self.retrieve_invalidate_urls()
self.logger.info("empty list") self.logger.info("empty list")
time.sleep(30) time.sleep(10)
self.logger.info("will basic_reject method.delivery_tag: " + str(method.delivery_tag)) self.logger.info("will basic_reject method.delivery_tag: " + str(method.delivery_tag))
ch.basic_reject(delivery_tag=method.delivery_tag, requeue=True) ch.basic_reject(delivery_tag=method.delivery_tag, requeue=True)
else: else:
+7 -4
View File
@@ -7,7 +7,7 @@ from db.mongo_manager import MONGO_STORE_MANAGER
from excel_reader import read_contacts from excel_reader import read_contacts
from models.contact_pojo import ContactPojo from models.contact_pojo import ContactPojo
from queue_message.CookiesPublisher import CookiesPublisher, SHARED_OBJECT, TEST_QUEUE, MORNING_DATA_CACHE, \ from queue_message.CookiesPublisher import CookiesPublisher, SHARED_OBJECT, TEST_QUEUE, MORNING_DATA_CACHE, \
MORNING_DATA_CACHE_2 MORNING_DATA_CACHE_2, MORNING_DATA_CACHE_BAK
from queue_message.appointmentrequestsender import AppointmentRequestSender from queue_message.appointmentrequestsender import AppointmentRequestSender
from utiles import is_time_between from utiles import is_time_between
from utils.AppLogging import init_logger from utils.AppLogging import init_logger
@@ -60,9 +60,12 @@ def send_appointment_request(message_queue_name, _contact_list):
logger.info(_contact) logger.info(_contact)
_cookiesPublisher = CookiesPublisher(queue_name=message_queue_name) _cookiesPublisher = CookiesPublisher(queue_name=message_queue_name)
_cookiesPublisher.set_up_connection() _cookiesPublisher.set_up_connection()
_backUp_cookiesPublisher = CookiesPublisher(queue_name=MORNING_DATA_CACHE_BAK)
_backUp_cookiesPublisher.set_up_connection()
receiver = AppointmentRequestSender(sub_contact_list=_contact_list, receiver = AppointmentRequestSender(sub_contact_list=_contact_list,
queue_name=message_queue_name, queue_name=message_queue_name,
cookiesPublisher=_cookiesPublisher, logger=logger) cookiesPublisher=_cookiesPublisher,
bakeUpCookiesPublisher=_backUp_cookiesPublisher, logger=logger)
print("count is " + str(count)) print("count is " + str(count))
receiver.run() receiver.run()
@@ -99,6 +102,6 @@ if __name__ == '__main__':
# '~/Desktop/contact_list_2024-05-21.xlsx', # '~/Desktop/contact_list_2024-05-21.xlsx',
# '~/Desktop/15_05_to_test.xlsx'] # '~/Desktop/15_05_to_test.xlsx']
# file_list = ['~/Desktop/15_05_to_test.xlsx', '~/Desktop/16_05_to_test.xlsx'] # file_list = ['~/Desktop/15_05_to_test.xlsx', '~/Desktop/16_05_to_test.xlsx']
file_list = ['~/Desktop/contact_list_2024-06-06.xlsx'] file_list = ['~/Desktop/contact_list_2024-06-28.xlsx']
send_request_for_file_list(file_list=file_list, thread_number=10, send_request_for_file_list(file_list=file_list, thread_number=10,
data_queue_name=MORNING_DATA_CACHE_2) data_queue_name=MORNING_DATA_CACHE)