Files
appointment_request/queue_message/appointmentrequestsender.py
T

277 lines
15 KiB
Python

import datetime
import json
import random
import threading
import time
import pika
from db.mongo_manager import MONGO_STORE_MANAGER
from mail.mail_reader_all_contacts import find_links_to_validate_from_mail_list
from models.ReserveResultPojo import ReserveResultPojo
from models.contact_pojo import ContactPojo
from models.jsdata_le_pojo import JsDataLeTypePojo
from models.jsdata_pojo import JsDataPojo
from models.result_pojo import RequestResult
from proxy_manager.proxy_manager import ProxyManager
from queue_message.CookiesPublisher import CookiesPublisher
from utiles import is_time_between
from workers.captcha_result_getter import CaptchaResultGetter, HERMES_REGISTER
from workers.sender import Sender
QUEUE_HOST = "appointment.lpaconsulting.fr"
REQUEST_DATA_QUEUE = 'REQUEST_DATA'
REQUEST_DATA_DE = 'REQUEST_DATA_DE'
credentials = pika.PlainCredentials('appointment', 'ZyuhJZ2xEYWhElhpJjy7YEpZGZwNYJz2fHIu')
def is_already_sent(contact: ContactPojo) -> bool:
already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
for required_contact in already_sent_contacts:
if contact.mail == required_contact.email:
return True
return False
def filter_contacts(_contact_list: list, provided_list=[]) -> list:
already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
already_sent_contacts.extend(provided_list)
_link_to_validate_list = MONGO_STORE_MANAGER.get_links_to_validate()
_contact_list_to_book = []
for contact in _contact_list:
_to_add = True
for booked in already_sent_contacts:
if isinstance(booked, ReserveResultPojo):
if contact.mail == booked.email:
_to_add = False
else:
if contact.mail == booked.mail:
_to_add = False
# 如果已经收到链接了,就不要再请求
for link_to_validate in _link_to_validate_list:
if contact.mail == link_to_validate.email:
print("{}: link already received".format(contact.mail))
_to_add = False
if _to_add:
_contact_list_to_book.append(contact)
return _contact_list_to_book
def is_open():
return is_time_between(datetime.time(10, 30), datetime.time(19, 00))
class AppointmentRequestSender(threading.Thread):
def __init__(self, sub_contact_list: list, logger, cookiesPublisher: CookiesPublisher,
bakeUpCookiesPublisher: CookiesPublisher,
queue_name=REQUEST_DATA_QUEUE, stop_at_hour=11, stop_at_mins=30):
super().__init__()
self.connection = None
self.logger = logger
self.already_tried_contact_list = []
self.cookiesPublisher = cookiesPublisher
self.bakeUpCookiesPublisher = bakeUpCookiesPublisher
self.channel = None
self.valid_csrf = None
self.list_to_retrieve_mails = sub_contact_list
self.initial_contact_list = sub_contact_list.copy()
self.contact_list = sub_contact_list
self.queue_name = queue_name
self.proxy_manager = ProxyManager(logger)
self.already_read_emails = False
self.stop_at_hour = stop_at_hour
self.stop_at_mins = stop_at_mins
def set_up_connection(self):
self.connection = pika.BlockingConnection(
pika.ConnectionParameters(host=QUEUE_HOST, port=5672, credentials=credentials))
self.channel = self.connection.channel()
def listen_to_queue(self, callback):
self.logger.info("listen to queue {}".format(self.queue_name))
self.channel.basic_qos(prefetch_count=1)
self.channel.basic_consume(queue=self.queue_name, auto_ack=False, on_message_callback=callback)
self.channel.start_consuming()
def on_message(self, ch, method, properties, body):
self.check_and_stop_if_necessary(hour_to_check=self.stop_at_hour, mins_to_check=self.stop_at_mins)
_message_count = self.cookiesPublisher.message_count()
self.logger.info("message count in queue is {}".format(_message_count))
_received_object = body.decode("UTF-8")
self.logger.info(f" [x] Received {_received_object}")
# 同一个COOKIES保持用同一个IP
_proxy_to_use = self.proxy_manager.get_proxy_for_appointment_request()
print("proxy to use is {}".format(_proxy_to_use))
if "glrd" in _received_object:
_received_dict = json.loads(_received_object)
js_data = JsDataPojo(glrd=_received_dict['glrd'], glvd=_received_dict['glvd'], hc=_received_dict['hc'],
ua=_received_dict['ua'], br_oh=_received_dict['br_oh'], br_ow=_received_dict['br_ow'],
ars_h=_received_dict['ars_h'], ars_w=_received_dict['ars_w'], pr=_received_dict['pr'],
plg=_received_dict['plg'], br_h=_received_dict['br_h'], br_w=_received_dict['br_w'],
plu=_received_dict['plu'], vnd=_received_dict['vnd'], dvm=_received_dict['dvm'],
ts_mtp=_received_dict['ts_mtp'], eva=_received_dict['eva'],
rs_h=_received_dict['rs_h'],
rs_w=_received_dict['rs_w'], rs_cd=_received_dict['rs_cd'], emd=_received_dict['emd'])
_received_cookies = _received_dict["cookiesStr"]
# remove already sent contacts
self.contact_list = filter_contacts(self.contact_list, self.already_tried_contact_list)
# remove already booked contacts
random.shuffle(self.contact_list)
if len(self.contact_list) > 0 and is_open():
captchaResultGetter = CaptchaResultGetter()
self.logger.info("contact number is {}".format(len(self.contact_list)))
# self.contact_list = filter_contacts(self.contact_list)
can_continue = None
for con in self.contact_list:
# _proxy_to_use = self.proxy_manager.get_proxy_for_appointment_request()
# print(_proxy_to_use)
sender = Sender(_received_cookies, cookiesPublisher=self.cookiesPublisher,
received_dict=_received_dict,
proxy_to_use=_proxy_to_use, logger=self.logger)
self.logger.info(con.mail)
if self.valid_csrf is None:
csrf_result = captchaResultGetter.get_csrf(
proxy_to_use=_proxy_to_use, js_data=js_data,
cookie=_received_cookies)
if isinstance(csrf_result, str):
self.valid_csrf = csrf_result
else:
self.logger.info("csrf is {}".format(csrf_result))
if csrf_result == RequestResult.BLOCKED:
can_continue = RequestResult.CSRF_BLOCKED
break
_new_cookies = captchaResultGetter.get_valid_ch_cookie(sender.proxy_to_use, js_data,
old_valid_cookie=_received_cookies)
if _new_cookies is not None:
self.logger.info("new cookie is " + _new_cookies)
time.sleep(random.randint(1, 3))
# m_s_c = f.scroll
m_s_c = random.randint(0, 3)
m_c_c = random.randint(3, 5) # click count
m_m_c = random.randint(3, 5) # move count
m_cm_r = m_c_c / m_m_c
m_ms_r = -1 # move scroll ratio
if m_s_c == 0:
m_ms_r = -1
else:
m_ms_r = m_m_c / m_s_c
js_le_data = JsDataLeTypePojo(glrd=_received_dict['glrd'], glvd=_received_dict['glvd'],
hc=_received_dict['hc'],
ua=_received_dict['ua'], br_oh=_received_dict['br_oh'],
br_ow=_received_dict['br_ow'],
ars_h=_received_dict['ars_h'], ars_w=_received_dict['ars_w'],
pr=_received_dict['pr'],
plg=_received_dict['plg'], br_h=_received_dict['br_h'],
br_w=_received_dict['br_w'],
plu=_received_dict['plu'], vnd=_received_dict['vnd'],
dvm=_received_dict['dvm'],
ts_mtp=_received_dict['ts_mtp'], eva=_received_dict['eva'],
rs_h=_received_dict['rs_h'],
rs_w=_received_dict['rs_w'], rs_cd=_received_dict['rs_cd'],
m_s_c=m_s_c, m_m_c=m_m_c, m_c_c=m_c_c,
m_cm_r=m_cm_r, m_ms_r=m_ms_r, emd=_received_dict['emd'])
time.sleep(random.randint(1, 4))
_new_le_cookies = captchaResultGetter.get_le_valid_cookie(proxy_to_use=_proxy_to_use,
js_le_type_data=js_le_data,
old_valid_cookie=_new_cookies)
if _new_le_cookies is not None:
# self.logger.info("new le type cookie is " + _new_le_cookies)
sender.cookie_str = _new_le_cookies
time.sleep(random.randint(1, 3))
self.already_tried_contact_list.append(con)
can_continue = sender.send_request(HERMES_REGISTER, js_data, con, csrf=self.valid_csrf)
if can_continue == RequestResult.SUCCESS:
# 让服务器读取成功的约会
try:
self.logger.info("try to remove success contact from list to retrieve mails")
self.list_to_retrieve_mails.remove(con)
except Exception as e:
self.logger.info(
"exception while remove success contact from list to retrieve mails")
print(e)
else:
can_continue = RequestResult.COOKIES_ERROR
else:
can_continue = RequestResult.COOKIES_ERROR
if can_continue == RequestResult.BLOCKED:
self.logger.info("BLOCKED, valid_csrf is " + str(self.valid_csrf))
break
elif can_continue == RequestResult.PROXY_ERROR:
self.logger.info("PROXY_ERROR, will not reset valid_csrf")
elif can_continue == RequestResult.COOKIES_ERROR:
self.logger.info("COOKIES_ERROR, will not reset valid_csrf")
else:
self.logger.info("can continue, will reset valid_csrf")
self.valid_csrf = None
time.sleep(random.randint(1, 2))
self.logger.info("will ack method.delivery_tag: " + str(method.delivery_tag))
if can_continue is not None and can_continue == RequestResult.CSRF_BLOCKED:
# 如果在发送请求时出现csrf被拦截的情况,那么就需要重新发布cookie以目前的队列中,因为这个cookie可能重新利用
self.logger.info("csrf blocked, will republish cookie")
self.cookiesPublisher.publish_body(_received_object)
self.logger.info("csrf blocked, will wait 60 seconds")
time.sleep(60)
ch.basic_ack(delivery_tag=method.delivery_tag)
elif can_continue is not None and can_continue == RequestResult.BLOCKED:
self.logger.info("这个cookies可以给点链接用")
self.bakeUpCookiesPublisher.publish_body(_received_object)
ch.basic_ack(delivery_tag=method.delivery_tag)
else:
ch.basic_ack(delivery_tag=method.delivery_tag)
else:
self.retrieve_invalidate_urls()
self.logger.info("empty list")
time.sleep(10)
self.logger.info("will basic_reject method.delivery_tag: " + str(method.delivery_tag))
ch.basic_reject(delivery_tag=method.delivery_tag, requeue=True)
else:
self.logger.info("not a valid object")
ch.basic_ack(delivery_tag=method.delivery_tag)
def run(self):
self.logger.info(threading.currentThread().name + " starts")
self.set_up_connection()
self.listen_to_queue(self.on_message)
self.channel.start_consuming()
def retrieve_invalidate_urls(self):
# 如果没有已读邮件,而且需要读邮件的联系人表不为空,就读取未读邮件
if not self.already_read_emails and len(self.list_to_retrieve_mails) > 0:
self.logger.info("will retrieve validate urls")
time.sleep(30)
_mail_list = MONGO_STORE_MANAGER.get_destination_emails()
_mail_list_filtered = []
for mail in _mail_list:
for _contact in self.list_to_retrieve_mails:
if _contact.mail == mail.mail:
self.logger.info("will get mail from " + mail.mail)
_mail_list_filtered.append(mail)
self.logger.info("will call find_links_to_validate_from_mail_list, size = " + str(len(_mail_list_filtered)))
find_links_to_validate_from_mail_list(_mail_list_filtered, self.logger)
self.already_read_emails = True
else:
self.logger.info("already read emails, is there any contacts to use")
self.logger.info("reset already_tried_contact_list")
# 重置已尝试的联系人
self.already_tried_contact_list = []
self.contact_list = filter_contacts(self.initial_contact_list, self.already_tried_contact_list)
self.logger.info("contact_list size is " + str(len(self.contact_list)))
if len(self.contact_list) > 0:
self.logger.info("set already_read_emails to False")
self.already_read_emails = False
else:
self.logger.info("already read emails, no contact to use -> stop")
self.channel.stop_consuming()
def check_and_stop_if_necessary(self, hour_to_check, mins_to_check):
hour = datetime.datetime.now().hour
mins = datetime.datetime.now().minute
if hour == hour_to_check and mins >= mins_to_check:
self.logger.info("will stop")
self.channel.stop_consuming()
elif hour > hour_to_check:
self.logger.info("will stop")
self.channel.stop_consuming()