From e204c4a6da3752dfdbce4aadea6c98d19a3e2969 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Thu, 20 Nov 2025 23:33:04 +0100 Subject: [PATCH] save model information used by link validation --- workers/link_validator_with_provided_list.py | 30 ++++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/workers/link_validator_with_provided_list.py b/workers/link_validator_with_provided_list.py index 56e8a7e..558cb0d 100644 --- a/workers/link_validator_with_provided_list.py +++ b/workers/link_validator_with_provided_list.py @@ -93,7 +93,7 @@ class LinkValidatorWithProvidedList(threading.Thread): print(response.url) MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo, segement_position=threading.currentThread().name, - ua=_ua) + ua=_ua, model=_model) # set new cookies _cookies_to_set = response.headers['set-cookie'] self.cookie.load(_cookies_to_set) @@ -112,7 +112,8 @@ class LinkValidatorWithProvidedList(threading.Thread): self.cookie_str = new_coolies_str return RequestResult.SUCCESS elif INVALID in _content: - MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo, is_invalid=True, ua=_ua) + MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo, is_invalid=True, ua=_ua, + model=_model) # set new cookies _cookies_to_set = response.headers['set-cookie'] self.cookie.load(_cookies_to_set) @@ -128,7 +129,7 @@ class LinkValidatorWithProvidedList(threading.Thread): elif DOUBLE_MESSAGE in _content: print(response.url) - MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo, is_duplicated=True, ua=_ua) + MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo, is_duplicated=True, ua=_ua, model= _model) # set new cookies _cookies_to_set = response.headers['set-cookie'] self.cookie.load(_cookies_to_set) @@ -240,7 +241,9 @@ def validate_all_links(_contact_serial_list): _queue_name = MORNING_DATA_CACHE_BAK # _queue_name = MORNING_DATA_CACHE _thread_list = [] - if len(_first_25_percent_links) >= 100: + if len(_first_25_percent_links) >= 200: + _segment_number = 200 + elif len(_first_25_percent_links) >= 100: _segment_number = 100 elif len(_first_25_percent_links) >= 40: _segment_number = 40 @@ -249,14 +252,17 @@ def validate_all_links(_contact_serial_list): for i in range(0, _segment_number): logger.info("{}:{} links to validate".format(threading.currentThread().name, len(_first_25_percent_links))) logger.info("segment is {}".format(i)) - _cookiesPublisher = CookiesPublisher(queue_name=_queue_name) - _cookiesPublisher.set_up_connection() - _step = int(len(_first_25_percent_links) / _segment_number) - _sublist = _first_25_percent_links[i * _step:_step * (i + 1)] - _thread1 = threading.Thread(target=validate_links, - args=(_cookiesPublisher, _queue_name, _sublist, _contact_serial_list)) - _thread_list.append(_thread1) - _thread1.start() + try: + _cookiesPublisher = CookiesPublisher(queue_name=_queue_name) + _cookiesPublisher.set_up_connection() + _step = int(len(_first_25_percent_links) / _segment_number) + _sublist = _first_25_percent_links[i * _step:_step * (i + 1)] + _thread1 = threading.Thread(target=validate_links, + args=(_cookiesPublisher, _queue_name, _sublist, _contact_serial_list)) + _thread_list.append(_thread1) + _thread1.start() + except Exception as error: + print(error) for _thread in _thread_list: _thread.join()