diff --git a/models/result_pojo.py b/models/result_pojo.py index a6ff553..8d62dfe 100644 --- a/models/result_pojo.py +++ b/models/result_pojo.py @@ -7,3 +7,4 @@ class RequestResult(Enum): SUCCESS = "SUCCESS" COOKIES_ERROR = "COOKIES_ERROR" UNKNOWN = "UNKNOWN" + CTRF_ERROR = "CTRF_ERROR" diff --git a/parallel_request_sender.py b/parallel_request_sender.py new file mode 100644 index 0000000..cf6f45c --- /dev/null +++ b/parallel_request_sender.py @@ -0,0 +1,87 @@ +import datetime +import logging +import sys +from threading import Thread + +from db.mongo_manager import MONGO_STORE_MANAGER +from excel_reader import read_contacts +from models.contact_pojo import ContactPojo +from queue_message.CookiesPublisher import CookiesPublisher, SHARED_OBJECT, TEST_QUEUE +from queue_message.appointmentrequestsender import AppointmentRequestSender +from queue_message.parallel_requestsender import ParallelRequestSender +from utiles import is_time_between +from utils.AppLogging import init_logger +from workers.proxies_constants import MOBILE_PROXY_LIST_FR + +IPFIY = 'http://api.ipify.org' +NGROK_TEST = "https://bcc6-193-164-156-53.ngrok-free.app" + + +def is_already_sent(contact: ContactPojo) -> bool: + already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day() + for required_contact in already_sent_contacts: + if contact.mail == required_contact.email: + return True + return False + + +def filter_contacts(_contact_list: list) -> list: + already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day() + _link_to_validate_list = MONGO_STORE_MANAGER.get_links_to_validate() + _contact_list_to_book = [] + for contact in _contact_list: + _to_add = True + for booked in already_sent_contacts: + if contact.mail == booked.email: + _to_add = False + # 如果已经收到链接了,就不要再请求 + for link_to_validate in _link_to_validate_list: + if contact.mail == link_to_validate.email: + logger.info("{}: link already received".format(contact.mail)) + _to_add = False + if _to_add: + _contact_list_to_book.append(contact) + + return _contact_list_to_book + + +def is_open(): + return is_time_between(datetime.time(10, 30), datetime.time(19, 00)) + + +count = 0 +init_logger() +logger = logging.getLogger() + +logger.addHandler(logging.StreamHandler(stream=sys.stdout)) + + +def send_appointment_request(message_queue_name, _contact_list): + global count + count = count + 1 + for _contact in _contact_list: + logger.info(_contact) + _cookiesPublisher = CookiesPublisher(queue_name=message_queue_name) + _cookiesPublisher.set_up_connection() + receiver = ParallelRequestSender(sub_contact_list=_contact_list, proxy_to_use_list=MOBILE_PROXY_LIST_FR, + queue_name=message_queue_name, just_send=True, + cookiesPublisher=_cookiesPublisher, logger=logger) + print("count is " + str(count)) + receiver.run() + + +if __name__ == '__main__': + contacts_file_path = '~/Desktop/31_03_to_test.xlsx' + _contact_list = read_contacts(contacts_file_path)[0:20] + _contact_list_to_book = filter_contacts(_contact_list) + _segment_number = 1 + logger.info("{} contacts to book".format(len(_contact_list_to_book))) + last_thread = None + for i in range(0, _segment_number): + logger.info("segment is {}".format(i)) + _step = int(len(_contact_list_to_book) / _segment_number) + _sublist = _contact_list_to_book[i * _step:_step * (i + 1)] + _thread1 = Thread(target=send_appointment_request, args=(TEST_QUEUE, _sublist)) + last_thread = _thread1 + _thread1.start() + last_thread.join() diff --git a/queue_message/parallel_requestsender.py b/queue_message/parallel_requestsender.py new file mode 100644 index 0000000..1c5ad23 --- /dev/null +++ b/queue_message/parallel_requestsender.py @@ -0,0 +1,221 @@ +import datetime +import json +import random +import threading +import time +from concurrent.futures.thread import ThreadPoolExecutor + +import pika + +from db.mongo_manager import MONGO_STORE_MANAGER +from mail.mail_reader_all_contacts import find_links_to_validate_from_mail_list +from models.ReserveResultPojo import ReserveResultPojo +from models.contact_pojo import ContactPojo +from models.jsdata_le_pojo import JsDataLeTypePojo +from models.jsdata_pojo import JsDataPojo +from models.result_pojo import RequestResult +from queue_message.CookiesPublisher import CookiesPublisher +from queue_message.appointmentrequestsender import filter_contacts, is_open +from utiles import is_time_between +from workers.captcha_result_getter import CaptchaResultGetter, HERMES_REGISTER +from workers.sender import Sender + +QUEUE_HOST = "appointment.lpaconsulting.fr" +REQUEST_DATA_QUEUE = 'REQUEST_DATA' +credentials = pika.PlainCredentials('appointment', 'ZyuhJZ2xEYWhElhpJjy7YEpZGZwNYJz2fHIu') + + +def is_already_sent(contact: ContactPojo) -> bool: + already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day() + for required_contact in already_sent_contacts: + if contact.mail == required_contact.email: + return True + return False + + +class ParallelRequestSender(threading.Thread): + def __init__(self, sub_contact_list: list, proxy_to_use_list, logger, cookiesPublisher: CookiesPublisher, + just_send=False, + queue_name=REQUEST_DATA_QUEUE): + super().__init__() + self.connection = None + self.just_send = just_send + self.logger = logger + self.already_tried_contact_list = [] + self.cookiesPublisher = cookiesPublisher + self.channel = None + self.valid_csrf = None + self.list_to_retrieve_mails = sub_contact_list + self.contact_list = sub_contact_list + self.queue_name = queue_name + self.proxy_to_use_list = proxy_to_use_list + self.already_read_emails = False + + def set_up_connection(self): + self.connection = pika.BlockingConnection( + pika.ConnectionParameters(host=QUEUE_HOST, port=5672, credentials=credentials)) + self.channel = self.connection.channel() + + def listen_to_queue(self, callback): + self.logger.info("listen to queue {}".format(self.queue_name)) + self.channel.basic_qos(prefetch_count=1) + self.channel.basic_consume(queue=self.queue_name, auto_ack=False, on_message_callback=callback) + self.channel.start_consuming() + + def send_request(self, _received_cookies, _received_dict, js_data: JsDataPojo, logger, + _contact) -> RequestResult: + _proxy_to_use = self.generate_proxy() + logger.info("send_request for contact: {}, cookies: {}".format(_contact.mail, _received_cookies)) + logger.info("proxy to use is {}".format(_proxy_to_use)) + sender = Sender(_received_cookies, cookiesPublisher=self.cookiesPublisher, received_dict=_received_dict, + proxy_to_use=_proxy_to_use, logger=logger) + # remove already sent contacts + if is_open(): + captchaResultGetter = CaptchaResultGetter() + _new_cookies = captchaResultGetter.get_valid_ch_cookie(sender.proxy_to_use, js_data, + old_valid_cookie=_received_cookies) + # self.contact_list = filter_contacts(self.contact_list) + logger.info(_contact.mail) + valid_csrf = captchaResultGetter.get_csrf( + proxy_to_use=_proxy_to_use, js_data=js_data, + cookie=_new_cookies) + if isinstance(valid_csrf, str): + if _new_cookies is not None: + logger.info("new cookie is " + _new_cookies) + # m_s_c = f.scroll + m_s_c = random.randint(0, 3) + m_c_c = random.randint(3, 5) # click count + m_m_c = random.randint(3, 5) # move count + m_cm_r = m_c_c / m_m_c + m_ms_r = random.randint(-1, 1) + + js_le_data = JsDataLeTypePojo(glrd=_received_dict['glrd'], glvd=_received_dict['glvd'], + hc=_received_dict['hc'], + ua=_received_dict['ua'], br_oh=_received_dict['br_oh'], + br_ow=_received_dict['br_ow'], + ars_h=_received_dict['ars_h'], ars_w=_received_dict['ars_w'], + pr=_received_dict['pr'], + plg=_received_dict['plg'], br_h=_received_dict['br_h'], + br_w=_received_dict['br_w'], + plu=_received_dict['plu'], vnd=_received_dict['vnd'], + dvm=_received_dict['dvm'], + ts_mtp=_received_dict['ts_mtp'], eva=_received_dict['eva'], + rs_h=_received_dict['rs_h'], + rs_w=_received_dict['rs_w'], rs_cd=_received_dict['rs_cd'], + m_s_c=m_s_c, m_m_c=m_m_c, m_c_c=m_c_c, + m_cm_r=m_cm_r, m_ms_r=m_ms_r) + time.sleep(random.randint(1, 4)) + _new_le_cookies = captchaResultGetter.get_le_valid_cookie(proxy_to_use=_proxy_to_use, + js_le_type_data=js_le_data, + old_valid_cookie=_new_cookies) + if _new_le_cookies is not None: + # self.logger.info("new le type cookie is " + _new_le_cookies) + sender.cookie_str = _new_le_cookies + time.sleep(random.randint(1, 3)) + self.already_tried_contact_list.append(_contact) + can_continue = sender.send_request(HERMES_REGISTER, js_data, _contact, csrf=valid_csrf) + if can_continue == RequestResult.SUCCESS: + # 让服务器读取成功的约会 + try: + self.logger.info("try to remove success contact from list to retrieve mails") + self.list_to_retrieve_mails.remove(_contact) + except Exception as e: + self.logger.info( + "exception while remove success contact from list to retrieve mails") + print(e) + else: + can_continue = RequestResult.COOKIES_ERROR + else: + can_continue = RequestResult.COOKIES_ERROR + if can_continue == RequestResult.BLOCKED: + self.logger.info("cannot continue, we are blocked " + str(self.valid_csrf)) + elif can_continue == RequestResult.PROXY_ERROR: + self.logger.info("PROXY_ERROR, will not reset valid_csrf") + elif can_continue == RequestResult.COOKIES_ERROR: + self.logger.info("COOKIES_ERROR, will not reset valid_csrf") + else: + self.logger.info("can continue, will reset valid_csrf") + self.valid_csrf = None + return can_continue + else: + return valid_csrf + # return RequestResult.CTRF_ERROR + + def getChTypeJsDataFromDict(self, _received_dict) -> JsDataPojo: + return JsDataPojo(glrd=_received_dict['glrd'], glvd=_received_dict['glvd'], hc=_received_dict['hc'], + ua=_received_dict['ua'], br_oh=_received_dict['br_oh'], br_ow=_received_dict['br_ow'], + ars_h=_received_dict['ars_h'], ars_w=_received_dict['ars_w'], pr=_received_dict['pr'], + plg=_received_dict['plg'], br_h=_received_dict['br_h'], br_w=_received_dict['br_w'], + plu=_received_dict['plu'], vnd=_received_dict['vnd'], dvm=_received_dict['dvm'], + ts_mtp=_received_dict['ts_mtp'], eva=_received_dict['eva'], + rs_h=_received_dict['rs_h'], + rs_w=_received_dict['rs_w'], rs_cd=_received_dict['rs_cd']) + + def generate_proxy(self): + _port = random.randint(40001, 49999) + _chosen_proxy = random.choice(self.proxy_to_use_list) + self.logger.info("generated port is {}".format(_port)) + _proxy_to_use = {} + _proxy_to_use["http"] = _chosen_proxy["http"].format(_port) + _proxy_to_use["https"] = _chosen_proxy["https"].format(_port) + return _proxy_to_use + + def on_message(self, ch, method, properties, body): + _message_count = self.cookiesPublisher.message_count() + self.logger.info("message count in queue is {}".format(_message_count)) + # prepare the contact list + if self.just_send: + self.contact_list = filter_contacts(self.contact_list, self.already_tried_contact_list) + else: + self.contact_list = filter_contacts(self.contact_list) + # remove already booked contacts + random.shuffle(self.contact_list) + _received_object = body.decode("UTF-8") + self.logger.info(f" [x] Received {_received_object}") + step = 5 + _received_dict = json.loads(_received_object) + js_data = self.getChTypeJsDataFromDict(_received_dict) + _received_cookies = _received_dict["cookiesStr"] + if len(self.contact_list) > step: + _sub_list = self.contact_list[0:step] + result = None + for con in _sub_list: + with ThreadPoolExecutor(max_workers=step) as executor: + result = executor.submit(self.send_request, _received_cookies, _received_dict, js_data, self.logger, + con) + self.logger.info("result is: " + str(result.result())) + if result.result() == RequestResult.SUCCESS: + self.logger.info("Success for {}, with cookies{}".format(con.mail, _received_cookies)) + if result.result() == RequestResult.BLOCKED or result.result() == RequestResult.CTRF_ERROR: + ch.basic_ack(delivery_tag=method.delivery_tag) + + else: + ch.basic_reject(delivery_tag=method.delivery_tag, requeue=True) + else: + self.retrieve_invalidate_urls() + self.logger.info("empty list") + time.sleep(120) + self.logger.info("will basic_reject method.delivery_tag: " + str(method.delivery_tag)) + ch.basic_reject(delivery_tag=method.delivery_tag, requeue=True) + + def run(self): + self.logger.info(threading.currentThread().name + " starts") + self.set_up_connection() + self.listen_to_queue(self.on_message) + self.channel.start_consuming() + + def retrieve_invalidate_urls(self): + if not self.already_read_emails and len(self.list_to_retrieve_mails) > 0: + self.logger.info("will retrieve validate urls") + time.sleep(30) + _mail_list = MONGO_STORE_MANAGER.get_destination_emails() + _mail_list_filtered = [] + for mail in _mail_list: + for _contact in self.list_to_retrieve_mails: + if _contact.mail == mail.mail: + _mail_list_filtered.append(mail) + self.logger.info("will call find_links_to_validate_from_mail_list, size = " + str(len(_mail_list_filtered))) + find_links_to_validate_from_mail_list(_mail_list_filtered) + self.already_read_emails = True + else: + self.logger.info("already read emails, will not retrieve validate urls") diff --git a/request_sender.py b/request_sender.py index 69470be..412bb79 100644 --- a/request_sender.py +++ b/request_sender.py @@ -33,7 +33,7 @@ def filter_contacts(_contact_list: list) -> list: for booked in already_sent_contacts: if contact.mail == booked.email: _to_add = False - #如果已经收到链接了,就不要再请求 + # 如果已经收到链接了,就不要再请求 for link_to_validate in _link_to_validate_list: if contact.mail == link_to_validate.email: logger.info("{}: link already received".format(contact.mail)) @@ -69,11 +69,14 @@ def send_appointment_request(message_queue_name, _contact_list): receiver.run() +def start(contact_list_file): + pass + if __name__ == '__main__': - contacts_file_path = '~/Desktop/contact_list_2024-03-23.xlsx' + contacts_file_path = '~/Desktop/contact_list_2024-04-05-2.xlsx' _contact_list = read_contacts(contacts_file_path) _contact_list_to_book = filter_contacts(_contact_list) - _segment_number = 5 + _segment_number = 10 logger.info("{} contacts to book".format(len(_contact_list_to_book))) last_thread = None for i in range(0, _segment_number): diff --git a/workers/proxies_constants.py b/workers/proxies_constants.py index faea1d8..949d3f1 100644 --- a/workers/proxies_constants.py +++ b/workers/proxies_constants.py @@ -71,5 +71,5 @@ DE_PROXY_RES = { # PROXY_LIST = [FR_PROXY_MOBILE, FR_PROXY_RES, DE_PROXY_RES, DE_PROXY_MOBILE, ES_PROXY_MOBILE, IT_PROXY_MOBILE] # PROXY_LIST_FR = [FR_PROXY_MOBILE_2, FR_PROXY_MOBILE, FR_PROXY_RES, FR_PROXY_RES_4, FR_PROXY_RES_2] # PROXY_LIST_FR = [FR_PROXY_RES_OXY, FR_PROXY_MOBILE_3] -PROXY_LIST_FR = [FR_PROXY_MOB_OXY] +PROXY_LIST_FR = [FR_PROXY_RES_OXY] MOBILE_PROXY_LIST_FR = [FR_PROXY_MOB_OXY_STICKY]