From 7eab86c910438c1a3f4e2ddf970cc44937ced058 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Thu, 18 Sep 2025 14:14:21 +0200 Subject: [PATCH] try to scheduler multi tasks --- workers/link_validator_with_provided_list.py | 32 +++++++++----------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/workers/link_validator_with_provided_list.py b/workers/link_validator_with_provided_list.py index e3574c3..bb9af8a 100644 --- a/workers/link_validator_with_provided_list.py +++ b/workers/link_validator_with_provided_list.py @@ -7,8 +7,9 @@ from datetime import datetime from http.cookies import SimpleCookie import pika -from curl_cffi import requests +# from curl_cffi import requests +import requests from db.mongo_manager import MONGO_STORE_MANAGER from models.LinkPojo import LinkPojo from models.result_pojo import RequestResult @@ -28,7 +29,8 @@ logger = logging.getLogger() def filter_link_pojo_list_with_serial(_received_dict, link_to_validate_list): _serial = _received_dict["serial"] _model = _received_dict["model"] - _bit_browser_list = list(filter(lambda link_pojo: link_pojo.serial == "requests", link_to_validate_list)) + # _bit_browser_list = list(filter(lambda link_pojo: link_pojo.serial == "requests", link_to_validate_list)) + _bit_browser_list = link_to_validate_list if len(_bit_browser_list) > 0: return _bit_browser_list else: @@ -77,7 +79,8 @@ class LinkValidatorWithProvidedList(threading.Thread): print("will send request with ua {}".format(_ua)) print("will send request with cookie {}".format(self.cookie_str)) response = requests.get(url=linkPojo.url, headers=headers, verify=False, proxies=self.proxy_to_use, - timeout=60, impersonate="chrome99_android") + timeout=60) + # timeout=60, impersonate="chrome99_android") print(response.status_code) if response.status_code == 200: _content = response.text @@ -181,7 +184,7 @@ class LinkValidatorWithProvidedList(threading.Thread): else: print("will ack") print("will wait for 40s") - time.sleep(20) + time.sleep(40) ch.basic_ack(delivery_tag=method.delivery_tag) else: print("empty list, will republish message") @@ -221,8 +224,8 @@ def validate_all_links(_contact_serial_list): for _link in all_link_list: # print("serial is "+_link.serial) print("email is " + _link.email) - if _link.serial == "requests": - link_to_validated.append(_link) + # if _link.serial == "requests": + link_to_validated.append(_link) # get the first 50 links if len(link_to_validated) == 0: return @@ -230,19 +233,14 @@ def validate_all_links(_contact_serial_list): # default_segment_number = 20 _first_25_percent_links = link_to_validated[0:(int(len(all_link_list) / divided))] _first_25_percent_links = all_link_list - # _queue_name = MORNING_DATA_CACHE + # _queue_name = "REQUEST_DATA" _queue_name = MORNING_DATA_CACHE_BAK - # if len(all_link_list) > divided * default_segment_number: - # _segment_number = default_segment_number - # else: - # _first_25_percent_links = all_link_list - # if len(_first_25_percent_links) > divided: - # _segment_number = int(len(_first_25_percent_links) / divided) - # else: - # _segment_number = 1 + # _queue_name = MORNING_DATA_CACHE _thread_list = [] - if len(_first_25_percent_links) >= 20: - _segment_number = 20 + if len(_first_25_percent_links) >= 100: + _segment_number = 100 + elif len(_first_25_percent_links) >= 40: + _segment_number = 40 else: _segment_number = len(_first_25_percent_links) for i in range(0, _segment_number):