From cb0a4df5a1337baf55b21e6cbeac83fb58ae7f8d Mon Sep 17 00:00:00 2001 From: PAN Lei Date: Sat, 11 May 2024 11:24:42 +0200 Subject: [PATCH] same cookie use same ip address --- proxy_manager/proxy_manager.py | 14 +++++++----- queue_message/appointmentrequestsender.py | 7 ++++-- request_sender.py | 14 ++++++------ request_sender_test.py | 2 +- scheduler_test.py | 2 +- workers/link_validator_with_provided_list.py | 23 +++++++++++++------- 6 files changed, 38 insertions(+), 24 deletions(-) diff --git a/proxy_manager/proxy_manager.py b/proxy_manager/proxy_manager.py index 2c23068..e46cbec 100644 --- a/proxy_manager/proxy_manager.py +++ b/proxy_manager/proxy_manager.py @@ -29,7 +29,13 @@ FR_PROXY_DATA_IMPULSE_STICKY = { 'https': 'http://1d568220dbefeff21ad4__cr.fr:df99489a25fa72d9@gw.dataimpulse.com:{}' } -MOBILE_PROXY_LIST = [FR_PROXY_MOB_OXY_STICKY, FR_PROXY_MOB_OXY_STICKY, FR_PROXY_MOB_OXY_STICKY, +FR_DATA_IMPULSE_RES = { + 'http': 'http://ac2b8183c3b02714cbd1__cr.fr:336e7f8f4b17e5bd@gw.dataimpulse.com:823', + 'https': 'http://ac2b8183c3b02714cbd1__cr.fr:336e7f8f4b17e5bd@gw.dataimpulse.com:823' +} +# 八分之一用data_impulse +MOBILE_PROXY_LIST = [FR_PROXY_MOB_OXY_STICKY, FR_PROXY_MOB_OXY_STICKY, FR_PROXY_MOB_OXY_STICKY, FR_PROXY_MOB_OXY_STICKY, + FR_PROXY_MOB_OXY_STICKY, FR_PROXY_MOB_OXY_STICKY, FR_PROXY_MOB_OXY_STICKY, FR_PROXY_DATA_IMPULSE_STICKY] @@ -40,11 +46,9 @@ class ProxyManager: def get_link_validate_proxy(self, links_to_validate: list) -> list: if len(links_to_validate) > 15: - return [FR_PROXY_RES_OXY, FR_PROXY_ASOCK_RES_2] - # return [FR_PROXY_RES_OXY] + return [FR_PROXY_RES_OXY, FR_PROXY_ASOCK_RES_2, FR_DATA_IMPULSE_RES] else: - return [FR_PROXY_RES_OXY, FR_PROXY_ASOCK_RES_2] - # return [FR_PROXY_RES_OXY] + return [FR_PROXY_RES_OXY, FR_PROXY_ASOCK_RES_2, FR_DATA_IMPULSE_RES] def get_proxy_for_appointment_request(self) -> dict: _chosen_proxy = random.choice(MOBILE_PROXY_LIST) diff --git a/queue_message/appointmentrequestsender.py b/queue_message/appointmentrequestsender.py index 5ddb2d9..1d962d6 100644 --- a/queue_message/appointmentrequestsender.py +++ b/queue_message/appointmentrequestsender.py @@ -97,6 +97,9 @@ class AppointmentRequestSender(threading.Thread): self.logger.info("message count in queue is {}".format(_message_count)) _received_object = body.decode("UTF-8") self.logger.info(f" [x] Received {_received_object}") + # 同一个COOKIES保持用同一个IP + _proxy_to_use = self.proxy_manager.get_proxy_for_appointment_request() + print("proxy to use is {}".format(_proxy_to_use)) if "glrd" in _received_object: _received_dict = json.loads(_received_object) js_data = JsDataPojo(glrd=_received_dict['glrd'], glvd=_received_dict['glvd'], hc=_received_dict['hc'], @@ -120,8 +123,8 @@ class AppointmentRequestSender(threading.Thread): self.logger.info("contact number is {}".format(len(self.contact_list))) # self.contact_list = filter_contacts(self.contact_list) for con in self.contact_list: - _proxy_to_use = self.proxy_manager.get_proxy_for_appointment_request() - print(_proxy_to_use) + # _proxy_to_use = self.proxy_manager.get_proxy_for_appointment_request() + # print(_proxy_to_use) sender = Sender(_received_cookies, cookiesPublisher=self.cookiesPublisher, received_dict=_received_dict, proxy_to_use=_proxy_to_use, logger=self.logger) self.logger.info(con.mail) diff --git a/request_sender.py b/request_sender.py index 14ea7ab..25077ae 100644 --- a/request_sender.py +++ b/request_sender.py @@ -6,7 +6,8 @@ from threading import Thread from db.mongo_manager import MONGO_STORE_MANAGER from excel_reader import read_contacts from models.contact_pojo import ContactPojo -from queue_message.CookiesPublisher import CookiesPublisher, SHARED_OBJECT, TEST_QUEUE +from queue_message.CookiesPublisher import CookiesPublisher, SHARED_OBJECT, TEST_QUEUE, MORNING_DATA_CACHE_2, \ + MORNING_DATA_CACHE from queue_message.appointmentrequestsender import AppointmentRequestSender from utiles import is_time_between from utils.AppLogging import init_logger @@ -62,8 +63,7 @@ def send_appointment_request(message_queue_name, _contact_list): logger.info(_contact) _cookiesPublisher = CookiesPublisher(queue_name=message_queue_name) _cookiesPublisher.set_up_connection() - receiver = AppointmentRequestSender(sub_contact_list=_contact_list, proxy_to_use_list=MOBILE_PROXY_LIST_FR, - queue_name=message_queue_name, just_send=True, + receiver = AppointmentRequestSender(sub_contact_list=_contact_list, queue_name=message_queue_name, just_send=True, cookiesPublisher=_cookiesPublisher, logger=logger) print("count is " + str(count)) receiver.run() @@ -71,17 +71,17 @@ def send_appointment_request(message_queue_name, _contact_list): def start_send_requests(): print("start send requests") - contacts_file_path = '~/Desktop/contact_list_2024-04-19.xlsx' - _contact_list = read_contacts(contacts_file_path) + contacts_file_path = '~/Desktop/11_05_to_test.xlsx' + _contact_list = read_contacts(contacts_file_path)[:-1] _contact_list_to_book = filter_contacts(_contact_list) - _segment_number = 1 + _segment_number = 5 logger.info("{} contacts to book".format(len(_contact_list_to_book))) last_thread = None for i in range(0, _segment_number): logger.info("segment is {}".format(i)) _step = int(len(_contact_list_to_book) / _segment_number) _sublist = _contact_list_to_book[i * _step:_step * (i + 1)] - _thread1 = Thread(target=send_appointment_request, args=(SHARED_OBJECT, _sublist)) + _thread1 = Thread(target=send_appointment_request, args=(MORNING_DATA_CACHE, _sublist)) last_thread = _thread1 _thread1.start() last_thread.join() diff --git a/request_sender_test.py b/request_sender_test.py index 95ee5df..5ca058e 100644 --- a/request_sender_test.py +++ b/request_sender_test.py @@ -86,4 +86,4 @@ def start_send_requests(thread_number, file_path): if __name__ == '__main__': - start_send_requests(thread_number=5, file_path='~/Desktop/contact_list_2024-04-24.xlsx') + start_send_requests(thread_number=28, file_path='~/Desktop/contact_list_2024-05-10.xlsx') diff --git a/scheduler_test.py b/scheduler_test.py index 6e0ae38..f347f07 100644 --- a/scheduler_test.py +++ b/scheduler_test.py @@ -5,7 +5,7 @@ from request_sender_test import start_send_requests def start_book_appointment(): - start_send_requests(thread_number=28, file_path='~/Desktop/contact_list_2024-05-09.xlsx') + start_send_requests(thread_number=28, file_path='~/Desktop/contact_list_2024-05-10.xlsx') def start_check_results_job(sched): diff --git a/workers/link_validator_with_provided_list.py b/workers/link_validator_with_provided_list.py index ad8f231..f338eca 100644 --- a/workers/link_validator_with_provided_list.py +++ b/workers/link_validator_with_provided_list.py @@ -68,8 +68,6 @@ class LinkValidatorWithProvidedList(threading.Thread): 'Sec-Fetch-Site': 'same-origin', 'Sec-Fetch-Dest': 'document', 'Accept-Language': 'fr-FR,fr;q=0.6'} - _proxy_to_use = random.choice(self.proxy_manager.get_link_validate_proxy(self.link_to_validate_list)) - print(_proxy_to_use) print("received cookie is " + str(self.cookie_str)) print("send request for link: " + linkPojo.url) try: @@ -161,7 +159,9 @@ class LinkValidatorWithProvidedList(threading.Thread): can_continue = None for link_to_validate in self.link_to_validate_list: print(link_to_validate) - self.proxy_to_use = random.choice(PROXY_LIST_FR) + self.proxy_to_use = random.choice( + self.proxy_manager.get_link_validate_proxy(self.link_to_validate_list)) + print("proxy to use is {}".format(self.proxy_to_use)) can_continue = self.send_request(link_to_validate, _received_dict) # remove the tested link from link list self.link_to_validate_list.remove(link_to_validate) @@ -205,17 +205,24 @@ def validate_links(cookiesPublisher, queue_name: str, link_list: list): def validate_all_links(): all_link_list = MONGO_STORE_MANAGER.get_links_to_validate() # get the first 50 links - _first_50_links = all_link_list[0:(int(len(all_link_list) / 4))] + if len(all_link_list) == 0: + return + divided = 4 + _first_25_percent_links = all_link_list[0:(int(len(all_link_list) / divided))] _queue_name = MORNING_DATA_CACHE_BAK - _segment_number = 20 + if len(_first_25_percent_links) > 40: + _segment_number = 20 + else: + _first_25_percent_links = all_link_list + _segment_number = int(len(_first_25_percent_links) / divided) last_thread = None for i in range(0, _segment_number): - logger.info("{}:{} links to validate".format(threading.currentThread().name, len(_first_50_links))) + logger.info("{}:{} links to validate".format(threading.currentThread().name, len(_first_25_percent_links))) logger.info("segment is {}".format(i)) _cookiesPublisher = CookiesPublisher(queue_name=_queue_name) _cookiesPublisher.set_up_connection() - _step = int(len(_first_50_links) / _segment_number) - _sublist = _first_50_links[i * _step:_step * (i + 1)] + _step = int(len(_first_25_percent_links) / _segment_number) + _sublist = _first_25_percent_links[i * _step:_step * (i + 1)] _thread1 = threading.Thread(target=validate_links, args=(_cookiesPublisher, MORNING_DATA_CACHE_BAK, _sublist)) last_thread = _thread1 _thread1.start()