save model information used by link validation

This commit is contained in:
2025-11-20 23:33:04 +01:00
parent 4a33094140
commit e204c4a6da
+18 -12
View File
@@ -93,7 +93,7 @@ class LinkValidatorWithProvidedList(threading.Thread):
print(response.url) print(response.url)
MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo, MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo,
segement_position=threading.currentThread().name, segement_position=threading.currentThread().name,
ua=_ua) ua=_ua, model=_model)
# set new cookies # set new cookies
_cookies_to_set = response.headers['set-cookie'] _cookies_to_set = response.headers['set-cookie']
self.cookie.load(_cookies_to_set) self.cookie.load(_cookies_to_set)
@@ -112,7 +112,8 @@ class LinkValidatorWithProvidedList(threading.Thread):
self.cookie_str = new_coolies_str self.cookie_str = new_coolies_str
return RequestResult.SUCCESS return RequestResult.SUCCESS
elif INVALID in _content: elif INVALID in _content:
MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo, is_invalid=True, ua=_ua) MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo, is_invalid=True, ua=_ua,
model=_model)
# set new cookies # set new cookies
_cookies_to_set = response.headers['set-cookie'] _cookies_to_set = response.headers['set-cookie']
self.cookie.load(_cookies_to_set) self.cookie.load(_cookies_to_set)
@@ -128,7 +129,7 @@ class LinkValidatorWithProvidedList(threading.Thread):
elif DOUBLE_MESSAGE in _content: elif DOUBLE_MESSAGE in _content:
print(response.url) print(response.url)
MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo, is_duplicated=True, ua=_ua) MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo, is_duplicated=True, ua=_ua, model= _model)
# set new cookies # set new cookies
_cookies_to_set = response.headers['set-cookie'] _cookies_to_set = response.headers['set-cookie']
self.cookie.load(_cookies_to_set) self.cookie.load(_cookies_to_set)
@@ -240,7 +241,9 @@ def validate_all_links(_contact_serial_list):
_queue_name = MORNING_DATA_CACHE_BAK _queue_name = MORNING_DATA_CACHE_BAK
# _queue_name = MORNING_DATA_CACHE # _queue_name = MORNING_DATA_CACHE
_thread_list = [] _thread_list = []
if len(_first_25_percent_links) >= 100: if len(_first_25_percent_links) >= 200:
_segment_number = 200
elif len(_first_25_percent_links) >= 100:
_segment_number = 100 _segment_number = 100
elif len(_first_25_percent_links) >= 40: elif len(_first_25_percent_links) >= 40:
_segment_number = 40 _segment_number = 40
@@ -249,14 +252,17 @@ def validate_all_links(_contact_serial_list):
for i in range(0, _segment_number): for i in range(0, _segment_number):
logger.info("{}:{} links to validate".format(threading.currentThread().name, len(_first_25_percent_links))) logger.info("{}:{} links to validate".format(threading.currentThread().name, len(_first_25_percent_links)))
logger.info("segment is {}".format(i)) logger.info("segment is {}".format(i))
_cookiesPublisher = CookiesPublisher(queue_name=_queue_name) try:
_cookiesPublisher.set_up_connection() _cookiesPublisher = CookiesPublisher(queue_name=_queue_name)
_step = int(len(_first_25_percent_links) / _segment_number) _cookiesPublisher.set_up_connection()
_sublist = _first_25_percent_links[i * _step:_step * (i + 1)] _step = int(len(_first_25_percent_links) / _segment_number)
_thread1 = threading.Thread(target=validate_links, _sublist = _first_25_percent_links[i * _step:_step * (i + 1)]
args=(_cookiesPublisher, _queue_name, _sublist, _contact_serial_list)) _thread1 = threading.Thread(target=validate_links,
_thread_list.append(_thread1) args=(_cookiesPublisher, _queue_name, _sublist, _contact_serial_list))
_thread1.start() _thread_list.append(_thread1)
_thread1.start()
except Exception as error:
print(error)
for _thread in _thread_list: for _thread in _thread_list:
_thread.join() _thread.join()