diff --git a/db/mongo_manager.py b/db/mongo_manager.py index 11af86c..6dc8e16 100755 --- a/db/mongo_manager.py +++ b/db/mongo_manager.py @@ -168,7 +168,9 @@ class MongoDbManager: return link_list def link_validated_for_result(self, link: str, linkPojo: LinkPojo, state=True, is_duplicated=False, - is_invalid=False, segement_position=1, ua="", model=""): + is_invalid=False, segement_position=1, ua="", model="", timestamp_in_s: list = None): + if timestamp_in_s is None: + timestamp_in_s = [] print("link_validated_for_result() called with url = " + link) if is_duplicated: _id = link.split("/")[-2] @@ -194,6 +196,7 @@ class MongoDbManager: "validated_by_model": model, "serial": linkPojo.serial, "validated_by_ua": ua, + "timestamp_in_s": "-".join(str(x) for x in timestamp_in_s), "validated_by": validated_by}}, upsert=True) # remove the link from db diff --git a/workers/link_validator_with_provided_list.py b/workers/link_validator_with_provided_list.py index 8fb6bf6..6007ef6 100644 --- a/workers/link_validator_with_provided_list.py +++ b/workers/link_validator_with_provided_list.py @@ -14,7 +14,8 @@ from db.mongo_manager import MONGO_STORE_MANAGER from models.LinkPojo import LinkPojo from models.result_pojo import RequestResult from proxy_manager.proxy_manager import ProxyManager -from queue_message.CookiesPublisher import CookiesPublisher, MORNING_DATA_CACHE, MORNING_DATA_CACHE_BAK +from queue_message.CookiesPublisher import CookiesPublisher, MORNING_DATA_CACHE, MORNING_DATA_CACHE_BAK, \ + MORNING_DATA_CACHE_2 from queue_message.appointmentrequestsender import QUEUE_HOST, REQUEST_DATA_QUEUE, credentials from utils.AppLogging import init_logger from utils.user_agent_helper import generate_headers_from_request_message @@ -31,6 +32,7 @@ def filter_link_pojo_list_with_model(_received_dict, link_to_validate_list): _model = _received_dict["model"] print("link list size before filter = {}".format(len(link_to_validate_list))) _links_filtered_by_model = list(filter(lambda link_pojo: link_pojo.model == _model, link_to_validate_list)) + # 注意:下面这行代码在原文件中覆盖了过滤结果,这里保留原逻辑,如需真正过滤请删除下一行 _links_filtered_by_model = link_to_validate_list print("link list size after filter = {}".format(len(_links_filtered_by_model))) if len(_links_filtered_by_model) > 0: @@ -91,9 +93,31 @@ class LinkValidatorWithProvidedList(threading.Thread): print(response.text) if "Votre demande de rendez-vous est enregistrée" in _content: print(response.url) + + # --- 更新时间戳逻辑开始 --- + _timestamps_to_save = [] + if _received_dict is not None: + try: + current_timestamp = int(time.time()) + # 如果不存在 timestampInS 字段,则初始化 + if "timestampInS" not in _received_dict: + _received_dict["timestampInS"] = [] + + # 确保是列表并添加当前时间戳 + if isinstance(_received_dict["timestampInS"], list): + _received_dict["timestampInS"].append(current_timestamp) + _timestamps_to_save = _received_dict["timestampInS"] + print("Updated timestampInS with: {}".format(current_timestamp)) + except Exception as e: + print(f"Error updating timestamp: {e}") + # --- 更新时间戳逻辑结束 --- + + # 调用 DB Manager,传入 timestampInS MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo, segement_position=threading.currentThread().name, - ua=_ua, model=_model) + ua=_ua, model=_model, + timestamp_in_s=_timestamps_to_save) + # set new cookies _cookies_to_set = response.headers['set-cookie'] self.cookie.load(_cookies_to_set) @@ -240,6 +264,7 @@ def validate_all_links(_contact_serial_list): # _queue_name = "REQUEST_DATA" _queue_name = MORNING_DATA_CACHE_BAK # _queue_name = MORNING_DATA_CACHE + # _queue_name = MORNING_DATA_CACHE_2 _thread_list = [] if len(_first_25_percent_links) >= 200: _segment_number = 200