import datetime import json import random import threading import time import pika from db.mongo_manager import MONGO_STORE_MANAGER from mail.mail_reader_all_contacts import find_links_to_validate_from_mail_list from models.ReserveResultPojo import ReserveResultPojo from models.contact_pojo import ContactPojo from models.jsdata_le_pojo import JsDataLeTypePojo from models.jsdata_pojo import JsDataPojo from models.result_pojo import RequestResult from proxy_manager.proxy_manager import ProxyManager from queue_message.CookiesPublisher import CookiesPublisher from utiles import is_time_between from workers.captcha_result_getter import CaptchaResultGetter, HERMES_REGISTER from workers.sender import Sender QUEUE_HOST = "appointment.lpaconsulting.fr" REQUEST_DATA_QUEUE = 'REQUEST_DATA' REQUEST_DATA_DE = 'REQUEST_DATA_DE' credentials = pika.PlainCredentials('appointment', 'ZyuhJZ2xEYWhElhpJjy7YEpZGZwNYJz2fHIu') def is_already_sent(contact: ContactPojo) -> bool: already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day() for required_contact in already_sent_contacts: if contact.mail == required_contact.email: return True return False def filter_contacts(_contact_list: list, provided_list=[]) -> list: already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day() already_sent_contacts.extend(provided_list) _link_to_validate_list = MONGO_STORE_MANAGER.get_links_to_validate() _contact_list_to_book = [] for contact in _contact_list: _to_add = True for booked in already_sent_contacts: if isinstance(booked, ReserveResultPojo): if contact.mail == booked.email: _to_add = False else: if contact.mail == booked.mail: _to_add = False # 如果已经收到链接了,就不要再请求 for link_to_validate in _link_to_validate_list: if contact.mail == link_to_validate.email: print("{}: link already received".format(contact.mail)) _to_add = False if _to_add: _contact_list_to_book.append(contact) return _contact_list_to_book def is_open(): return is_time_between(datetime.time(10, 30), datetime.time(19, 00)) class AppointmentRequestSender(threading.Thread): def __init__(self, sub_contact_list: list, logger, cookiesPublisher: CookiesPublisher, bakeUpCookiesPublisher: CookiesPublisher, queue_name=REQUEST_DATA_QUEUE, stop_at_hour=11, stop_at_mins=30): super().__init__() self.connection = None self.logger = logger self.already_tried_contact_list = [] self.cookiesPublisher = cookiesPublisher self.bakeUpCookiesPublisher = bakeUpCookiesPublisher self.channel = None self.valid_csrf = None self.list_to_retrieve_mails = sub_contact_list self.initial_contact_list = sub_contact_list.copy() self.contact_list = sub_contact_list self.queue_name = queue_name self.proxy_manager = ProxyManager(logger) self.already_read_emails = False self.stop_at_hour = stop_at_hour self.stop_at_mins = stop_at_mins def set_up_connection(self): self.connection = pika.BlockingConnection( pika.ConnectionParameters(host=QUEUE_HOST, port=5672, credentials=credentials)) self.channel = self.connection.channel() def listen_to_queue(self, callback): self.logger.info("listen to queue {}".format(self.queue_name)) self.channel.basic_qos(prefetch_count=1) self.channel.basic_consume(queue=self.queue_name, auto_ack=False, on_message_callback=callback) self.channel.start_consuming() def on_message(self, ch, method, properties, body): self.check_and_stop_if_necessary(hour_to_check=self.stop_at_hour, mins_to_check=self.stop_at_mins) _message_count = self.cookiesPublisher.message_count() self.logger.info("message count in queue is {}".format(_message_count)) _received_object = body.decode("UTF-8") self.logger.info(f" [x] Received {_received_object}") # 同一个COOKIES保持用同一个IP # _proxy_to_use = self.proxy_manager.get_proxy_for_appointment_request() # print("proxy to use is {}".format(_proxy_to_use)) if "glrd" in _received_object: _received_dict = json.loads(_received_object) js_data = JsDataPojo(glrd=_received_dict['glrd'], glvd=_received_dict['glvd'], hc=_received_dict['hc'], ua=_received_dict['ua'], br_oh=_received_dict['br_oh'], br_ow=_received_dict['br_ow'], ars_h=_received_dict['ars_h'], ars_w=_received_dict['ars_w'], pr=_received_dict['pr'], plg=_received_dict['plg'], br_h=_received_dict['br_h'], br_w=_received_dict['br_w'], plu=_received_dict['plu'], vnd=_received_dict['vnd'], dvm=_received_dict['dvm'], ts_mtp=_received_dict['ts_mtp'], eva=_received_dict['eva'], rs_h=_received_dict['rs_h'], rs_w=_received_dict['rs_w'], rs_cd=_received_dict['rs_cd'], emd=_received_dict['emd']) _received_cookies = _received_dict["cookiesStr"] # remove already sent contacts self.contact_list = filter_contacts(self.contact_list, self.already_tried_contact_list) # remove already booked contacts random.shuffle(self.contact_list) if len(self.contact_list) > 0 and is_open(): captchaResultGetter = CaptchaResultGetter() self.logger.info("contact number is {}".format(len(self.contact_list))) # self.contact_list = filter_contacts(self.contact_list) can_continue = None for con in self.contact_list: _proxy_to_use = self.proxy_manager.get_proxy_for_appointment_request() print(_proxy_to_use) sender = Sender(_received_cookies, cookiesPublisher=self.cookiesPublisher, received_dict=_received_dict, proxy_to_use=_proxy_to_use, logger=self.logger) self.logger.info(con.mail) if self.valid_csrf is None: csrf_result = captchaResultGetter.get_csrf( proxy_to_use=_proxy_to_use, js_data=js_data, cookie=_received_cookies) if isinstance(csrf_result, str): self.valid_csrf = csrf_result else: self.logger.info("csrf is {}".format(csrf_result)) if csrf_result == RequestResult.BLOCKED: can_continue = RequestResult.CSRF_BLOCKED break _new_cookies = captchaResultGetter.get_valid_ch_cookie(sender.proxy_to_use, js_data, old_valid_cookie=_received_cookies) if _new_cookies is not None: self.logger.info("new cookie is " + _new_cookies) time.sleep(random.randint(1, 3)) # m_s_c = f.scroll m_s_c = random.randint(0, 3) m_c_c = random.randint(3, 5) # click count m_m_c = random.randint(3, 5) # move count m_cm_r = m_c_c / m_m_c m_ms_r = -1 # move scroll ratio if m_s_c == 0: m_ms_r = -1 else: m_ms_r = m_m_c / m_s_c js_le_data = JsDataLeTypePojo(glrd=_received_dict['glrd'], glvd=_received_dict['glvd'], hc=_received_dict['hc'], ua=_received_dict['ua'], br_oh=_received_dict['br_oh'], br_ow=_received_dict['br_ow'], ars_h=_received_dict['ars_h'], ars_w=_received_dict['ars_w'], pr=_received_dict['pr'], plg=_received_dict['plg'], br_h=_received_dict['br_h'], br_w=_received_dict['br_w'], plu=_received_dict['plu'], vnd=_received_dict['vnd'], dvm=_received_dict['dvm'], ts_mtp=_received_dict['ts_mtp'], eva=_received_dict['eva'], rs_h=_received_dict['rs_h'], rs_w=_received_dict['rs_w'], rs_cd=_received_dict['rs_cd'], m_s_c=m_s_c, m_m_c=m_m_c, m_c_c=m_c_c, m_cm_r=m_cm_r, m_ms_r=m_ms_r, emd=_received_dict['emd']) time.sleep(random.randint(1, 4)) _new_le_cookies = captchaResultGetter.get_le_valid_cookie(proxy_to_use=_proxy_to_use, js_le_type_data=js_le_data, old_valid_cookie=_new_cookies) if _new_le_cookies is not None: # self.logger.info("new le type cookie is " + _new_le_cookies) sender.cookie_str = _new_le_cookies time.sleep(random.randint(1, 3)) self.already_tried_contact_list.append(con) _telephone_model = _received_dict['model'] can_continue = sender.send_request(HERMES_REGISTER, js_data, con, model=_telephone_model, csrf=self.valid_csrf) if can_continue == RequestResult.SUCCESS: # 让服务器读取成功的约会 try: self.logger.info("try to remove success contact from list to retrieve mails") self.list_to_retrieve_mails.remove(con) except Exception as e: self.logger.info( "exception while remove success contact from list to retrieve mails") print(e) else: can_continue = RequestResult.COOKIES_ERROR else: can_continue = RequestResult.COOKIES_ERROR if can_continue == RequestResult.BLOCKED: self.logger.info("BLOCKED, valid_csrf is " + str(self.valid_csrf)) break elif can_continue == RequestResult.PROXY_ERROR: self.logger.info("PROXY_ERROR, will not reset valid_csrf") elif can_continue == RequestResult.COOKIES_ERROR: self.logger.info("COOKIES_ERROR, will not reset valid_csrf") else: self.logger.info("can continue, will reset valid_csrf") self.valid_csrf = None time.sleep(random.randint(1, 2)) self.logger.info("will ack method.delivery_tag: " + str(method.delivery_tag)) if can_continue is not None and can_continue == RequestResult.CSRF_BLOCKED: # 如果在发送请求时出现csrf被拦截的情况,那么就需要重新发布cookie以目前的队列中,因为这个cookie可能重新利用 self.logger.info("csrf blocked, will republish cookie") self.cookiesPublisher.publish_body(_received_object) self.logger.info("csrf blocked, will wait 60 seconds") time.sleep(60) ch.basic_ack(delivery_tag=method.delivery_tag) elif can_continue is not None and can_continue == RequestResult.BLOCKED: self.logger.info("这个cookies可以给点链接用") self.bakeUpCookiesPublisher.publish_body(_received_object) ch.basic_ack(delivery_tag=method.delivery_tag) else: ch.basic_ack(delivery_tag=method.delivery_tag) else: self.retrieve_invalidate_urls() self.logger.info("empty list") time.sleep(10) self.logger.info("will basic_reject method.delivery_tag: " + str(method.delivery_tag)) ch.basic_reject(delivery_tag=method.delivery_tag, requeue=True) else: self.logger.info("not a valid object") ch.basic_ack(delivery_tag=method.delivery_tag) def run(self): self.logger.info(threading.currentThread().name + " starts") self.set_up_connection() self.listen_to_queue(self.on_message) self.channel.start_consuming() def retrieve_invalidate_urls(self): self.list_to_retrieve_mails = self.initial_contact_list # 如果没有已读邮件,而且需要读邮件的联系人表不为空,就读取未读邮件 if not self.already_read_emails and len(self.list_to_retrieve_mails) > 0: self.logger.info("will retrieve validate urls") time.sleep(30) _mail_list = MONGO_STORE_MANAGER.get_destination_emails() _mail_list_filtered = [] for mail in _mail_list: for _contact in self.list_to_retrieve_mails: if _contact.mail == mail.mail: self.logger.info("will get mail from " + mail.mail) _mail_list_filtered.append(mail) self.logger.info("will call find_links_to_validate_from_mail_list, size = " + str(len(_mail_list_filtered))) find_links_to_validate_from_mail_list(_mail_list_filtered, self.logger) self.already_read_emails = True else: self.logger.info("already read emails, is there any contacts to use") self.logger.info("reset already_tried_contact_list") # 重置已尝试的联系人 self.already_tried_contact_list = [] self.contact_list = filter_contacts(self.initial_contact_list, self.already_tried_contact_list) self.logger.info("contact_list size is " + str(len(self.contact_list))) if len(self.contact_list) > 0: self.logger.info("set already_read_emails to False") self.already_read_emails = False else: self.logger.info("already read emails, no contact to use -> stop") self.channel.stop_consuming() def check_and_stop_if_necessary(self, hour_to_check, mins_to_check): hour = datetime.datetime.now().hour mins = datetime.datetime.now().minute if hour == hour_to_check and mins >= mins_to_check: self.logger.info("will stop") self.channel.stop_consuming() elif hour > hour_to_check: self.logger.info("will stop") self.channel.stop_consuming()