save model information used by link validation
This commit is contained in:
@@ -93,7 +93,7 @@ class LinkValidatorWithProvidedList(threading.Thread):
|
|||||||
print(response.url)
|
print(response.url)
|
||||||
MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo,
|
MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo,
|
||||||
segement_position=threading.currentThread().name,
|
segement_position=threading.currentThread().name,
|
||||||
ua=_ua)
|
ua=_ua, model=_model)
|
||||||
# set new cookies
|
# set new cookies
|
||||||
_cookies_to_set = response.headers['set-cookie']
|
_cookies_to_set = response.headers['set-cookie']
|
||||||
self.cookie.load(_cookies_to_set)
|
self.cookie.load(_cookies_to_set)
|
||||||
@@ -112,7 +112,8 @@ class LinkValidatorWithProvidedList(threading.Thread):
|
|||||||
self.cookie_str = new_coolies_str
|
self.cookie_str = new_coolies_str
|
||||||
return RequestResult.SUCCESS
|
return RequestResult.SUCCESS
|
||||||
elif INVALID in _content:
|
elif INVALID in _content:
|
||||||
MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo, is_invalid=True, ua=_ua)
|
MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo, is_invalid=True, ua=_ua,
|
||||||
|
model=_model)
|
||||||
# set new cookies
|
# set new cookies
|
||||||
_cookies_to_set = response.headers['set-cookie']
|
_cookies_to_set = response.headers['set-cookie']
|
||||||
self.cookie.load(_cookies_to_set)
|
self.cookie.load(_cookies_to_set)
|
||||||
@@ -128,7 +129,7 @@ class LinkValidatorWithProvidedList(threading.Thread):
|
|||||||
|
|
||||||
elif DOUBLE_MESSAGE in _content:
|
elif DOUBLE_MESSAGE in _content:
|
||||||
print(response.url)
|
print(response.url)
|
||||||
MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo, is_duplicated=True, ua=_ua)
|
MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo, is_duplicated=True, ua=_ua, model= _model)
|
||||||
# set new cookies
|
# set new cookies
|
||||||
_cookies_to_set = response.headers['set-cookie']
|
_cookies_to_set = response.headers['set-cookie']
|
||||||
self.cookie.load(_cookies_to_set)
|
self.cookie.load(_cookies_to_set)
|
||||||
@@ -240,7 +241,9 @@ def validate_all_links(_contact_serial_list):
|
|||||||
_queue_name = MORNING_DATA_CACHE_BAK
|
_queue_name = MORNING_DATA_CACHE_BAK
|
||||||
# _queue_name = MORNING_DATA_CACHE
|
# _queue_name = MORNING_DATA_CACHE
|
||||||
_thread_list = []
|
_thread_list = []
|
||||||
if len(_first_25_percent_links) >= 100:
|
if len(_first_25_percent_links) >= 200:
|
||||||
|
_segment_number = 200
|
||||||
|
elif len(_first_25_percent_links) >= 100:
|
||||||
_segment_number = 100
|
_segment_number = 100
|
||||||
elif len(_first_25_percent_links) >= 40:
|
elif len(_first_25_percent_links) >= 40:
|
||||||
_segment_number = 40
|
_segment_number = 40
|
||||||
@@ -249,14 +252,17 @@ def validate_all_links(_contact_serial_list):
|
|||||||
for i in range(0, _segment_number):
|
for i in range(0, _segment_number):
|
||||||
logger.info("{}:{} links to validate".format(threading.currentThread().name, len(_first_25_percent_links)))
|
logger.info("{}:{} links to validate".format(threading.currentThread().name, len(_first_25_percent_links)))
|
||||||
logger.info("segment is {}".format(i))
|
logger.info("segment is {}".format(i))
|
||||||
_cookiesPublisher = CookiesPublisher(queue_name=_queue_name)
|
try:
|
||||||
_cookiesPublisher.set_up_connection()
|
_cookiesPublisher = CookiesPublisher(queue_name=_queue_name)
|
||||||
_step = int(len(_first_25_percent_links) / _segment_number)
|
_cookiesPublisher.set_up_connection()
|
||||||
_sublist = _first_25_percent_links[i * _step:_step * (i + 1)]
|
_step = int(len(_first_25_percent_links) / _segment_number)
|
||||||
_thread1 = threading.Thread(target=validate_links,
|
_sublist = _first_25_percent_links[i * _step:_step * (i + 1)]
|
||||||
args=(_cookiesPublisher, _queue_name, _sublist, _contact_serial_list))
|
_thread1 = threading.Thread(target=validate_links,
|
||||||
_thread_list.append(_thread1)
|
args=(_cookiesPublisher, _queue_name, _sublist, _contact_serial_list))
|
||||||
_thread1.start()
|
_thread_list.append(_thread1)
|
||||||
|
_thread1.start()
|
||||||
|
except Exception as error:
|
||||||
|
print(error)
|
||||||
for _thread in _thread_list:
|
for _thread in _thread_list:
|
||||||
_thread.join()
|
_thread.join()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user