break when the get_csrf is blocked
This commit is contained in:
@@ -7,3 +7,4 @@ class RequestResult(Enum):
|
|||||||
SUCCESS = "SUCCESS"
|
SUCCESS = "SUCCESS"
|
||||||
COOKIES_ERROR = "COOKIES_ERROR"
|
COOKIES_ERROR = "COOKIES_ERROR"
|
||||||
UNKNOWN = "UNKNOWN"
|
UNKNOWN = "UNKNOWN"
|
||||||
|
CTRF_ERROR = "CTRF_ERROR"
|
||||||
|
|||||||
@@ -0,0 +1,87 @@
|
|||||||
|
import datetime
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
from threading import Thread
|
||||||
|
|
||||||
|
from db.mongo_manager import MONGO_STORE_MANAGER
|
||||||
|
from excel_reader import read_contacts
|
||||||
|
from models.contact_pojo import ContactPojo
|
||||||
|
from queue_message.CookiesPublisher import CookiesPublisher, SHARED_OBJECT, TEST_QUEUE
|
||||||
|
from queue_message.appointmentrequestsender import AppointmentRequestSender
|
||||||
|
from queue_message.parallel_requestsender import ParallelRequestSender
|
||||||
|
from utiles import is_time_between
|
||||||
|
from utils.AppLogging import init_logger
|
||||||
|
from workers.proxies_constants import MOBILE_PROXY_LIST_FR
|
||||||
|
|
||||||
|
IPFIY = 'http://api.ipify.org'
|
||||||
|
NGROK_TEST = "https://bcc6-193-164-156-53.ngrok-free.app"
|
||||||
|
|
||||||
|
|
||||||
|
def is_already_sent(contact: ContactPojo) -> bool:
|
||||||
|
already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
|
||||||
|
for required_contact in already_sent_contacts:
|
||||||
|
if contact.mail == required_contact.email:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def filter_contacts(_contact_list: list) -> list:
|
||||||
|
already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
|
||||||
|
_link_to_validate_list = MONGO_STORE_MANAGER.get_links_to_validate()
|
||||||
|
_contact_list_to_book = []
|
||||||
|
for contact in _contact_list:
|
||||||
|
_to_add = True
|
||||||
|
for booked in already_sent_contacts:
|
||||||
|
if contact.mail == booked.email:
|
||||||
|
_to_add = False
|
||||||
|
# 如果已经收到链接了,就不要再请求
|
||||||
|
for link_to_validate in _link_to_validate_list:
|
||||||
|
if contact.mail == link_to_validate.email:
|
||||||
|
logger.info("{}: link already received".format(contact.mail))
|
||||||
|
_to_add = False
|
||||||
|
if _to_add:
|
||||||
|
_contact_list_to_book.append(contact)
|
||||||
|
|
||||||
|
return _contact_list_to_book
|
||||||
|
|
||||||
|
|
||||||
|
def is_open():
|
||||||
|
return is_time_between(datetime.time(10, 30), datetime.time(19, 00))
|
||||||
|
|
||||||
|
|
||||||
|
count = 0
|
||||||
|
init_logger()
|
||||||
|
logger = logging.getLogger()
|
||||||
|
|
||||||
|
logger.addHandler(logging.StreamHandler(stream=sys.stdout))
|
||||||
|
|
||||||
|
|
||||||
|
def send_appointment_request(message_queue_name, _contact_list):
|
||||||
|
global count
|
||||||
|
count = count + 1
|
||||||
|
for _contact in _contact_list:
|
||||||
|
logger.info(_contact)
|
||||||
|
_cookiesPublisher = CookiesPublisher(queue_name=message_queue_name)
|
||||||
|
_cookiesPublisher.set_up_connection()
|
||||||
|
receiver = ParallelRequestSender(sub_contact_list=_contact_list, proxy_to_use_list=MOBILE_PROXY_LIST_FR,
|
||||||
|
queue_name=message_queue_name, just_send=True,
|
||||||
|
cookiesPublisher=_cookiesPublisher, logger=logger)
|
||||||
|
print("count is " + str(count))
|
||||||
|
receiver.run()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
contacts_file_path = '~/Desktop/31_03_to_test.xlsx'
|
||||||
|
_contact_list = read_contacts(contacts_file_path)[0:20]
|
||||||
|
_contact_list_to_book = filter_contacts(_contact_list)
|
||||||
|
_segment_number = 1
|
||||||
|
logger.info("{} contacts to book".format(len(_contact_list_to_book)))
|
||||||
|
last_thread = None
|
||||||
|
for i in range(0, _segment_number):
|
||||||
|
logger.info("segment is {}".format(i))
|
||||||
|
_step = int(len(_contact_list_to_book) / _segment_number)
|
||||||
|
_sublist = _contact_list_to_book[i * _step:_step * (i + 1)]
|
||||||
|
_thread1 = Thread(target=send_appointment_request, args=(TEST_QUEUE, _sublist))
|
||||||
|
last_thread = _thread1
|
||||||
|
_thread1.start()
|
||||||
|
last_thread.join()
|
||||||
@@ -0,0 +1,221 @@
|
|||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import random
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from concurrent.futures.thread import ThreadPoolExecutor
|
||||||
|
|
||||||
|
import pika
|
||||||
|
|
||||||
|
from db.mongo_manager import MONGO_STORE_MANAGER
|
||||||
|
from mail.mail_reader_all_contacts import find_links_to_validate_from_mail_list
|
||||||
|
from models.ReserveResultPojo import ReserveResultPojo
|
||||||
|
from models.contact_pojo import ContactPojo
|
||||||
|
from models.jsdata_le_pojo import JsDataLeTypePojo
|
||||||
|
from models.jsdata_pojo import JsDataPojo
|
||||||
|
from models.result_pojo import RequestResult
|
||||||
|
from queue_message.CookiesPublisher import CookiesPublisher
|
||||||
|
from queue_message.appointmentrequestsender import filter_contacts, is_open
|
||||||
|
from utiles import is_time_between
|
||||||
|
from workers.captcha_result_getter import CaptchaResultGetter, HERMES_REGISTER
|
||||||
|
from workers.sender import Sender
|
||||||
|
|
||||||
|
QUEUE_HOST = "appointment.lpaconsulting.fr"
|
||||||
|
REQUEST_DATA_QUEUE = 'REQUEST_DATA'
|
||||||
|
credentials = pika.PlainCredentials('appointment', 'ZyuhJZ2xEYWhElhpJjy7YEpZGZwNYJz2fHIu')
|
||||||
|
|
||||||
|
|
||||||
|
def is_already_sent(contact: ContactPojo) -> bool:
|
||||||
|
already_sent_contacts = MONGO_STORE_MANAGER.get_all_successful_items_for_day()
|
||||||
|
for required_contact in already_sent_contacts:
|
||||||
|
if contact.mail == required_contact.email:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class ParallelRequestSender(threading.Thread):
|
||||||
|
def __init__(self, sub_contact_list: list, proxy_to_use_list, logger, cookiesPublisher: CookiesPublisher,
|
||||||
|
just_send=False,
|
||||||
|
queue_name=REQUEST_DATA_QUEUE):
|
||||||
|
super().__init__()
|
||||||
|
self.connection = None
|
||||||
|
self.just_send = just_send
|
||||||
|
self.logger = logger
|
||||||
|
self.already_tried_contact_list = []
|
||||||
|
self.cookiesPublisher = cookiesPublisher
|
||||||
|
self.channel = None
|
||||||
|
self.valid_csrf = None
|
||||||
|
self.list_to_retrieve_mails = sub_contact_list
|
||||||
|
self.contact_list = sub_contact_list
|
||||||
|
self.queue_name = queue_name
|
||||||
|
self.proxy_to_use_list = proxy_to_use_list
|
||||||
|
self.already_read_emails = False
|
||||||
|
|
||||||
|
def set_up_connection(self):
|
||||||
|
self.connection = pika.BlockingConnection(
|
||||||
|
pika.ConnectionParameters(host=QUEUE_HOST, port=5672, credentials=credentials))
|
||||||
|
self.channel = self.connection.channel()
|
||||||
|
|
||||||
|
def listen_to_queue(self, callback):
|
||||||
|
self.logger.info("listen to queue {}".format(self.queue_name))
|
||||||
|
self.channel.basic_qos(prefetch_count=1)
|
||||||
|
self.channel.basic_consume(queue=self.queue_name, auto_ack=False, on_message_callback=callback)
|
||||||
|
self.channel.start_consuming()
|
||||||
|
|
||||||
|
def send_request(self, _received_cookies, _received_dict, js_data: JsDataPojo, logger,
|
||||||
|
_contact) -> RequestResult:
|
||||||
|
_proxy_to_use = self.generate_proxy()
|
||||||
|
logger.info("send_request for contact: {}, cookies: {}".format(_contact.mail, _received_cookies))
|
||||||
|
logger.info("proxy to use is {}".format(_proxy_to_use))
|
||||||
|
sender = Sender(_received_cookies, cookiesPublisher=self.cookiesPublisher, received_dict=_received_dict,
|
||||||
|
proxy_to_use=_proxy_to_use, logger=logger)
|
||||||
|
# remove already sent contacts
|
||||||
|
if is_open():
|
||||||
|
captchaResultGetter = CaptchaResultGetter()
|
||||||
|
_new_cookies = captchaResultGetter.get_valid_ch_cookie(sender.proxy_to_use, js_data,
|
||||||
|
old_valid_cookie=_received_cookies)
|
||||||
|
# self.contact_list = filter_contacts(self.contact_list)
|
||||||
|
logger.info(_contact.mail)
|
||||||
|
valid_csrf = captchaResultGetter.get_csrf(
|
||||||
|
proxy_to_use=_proxy_to_use, js_data=js_data,
|
||||||
|
cookie=_new_cookies)
|
||||||
|
if isinstance(valid_csrf, str):
|
||||||
|
if _new_cookies is not None:
|
||||||
|
logger.info("new cookie is " + _new_cookies)
|
||||||
|
# m_s_c = f.scroll
|
||||||
|
m_s_c = random.randint(0, 3)
|
||||||
|
m_c_c = random.randint(3, 5) # click count
|
||||||
|
m_m_c = random.randint(3, 5) # move count
|
||||||
|
m_cm_r = m_c_c / m_m_c
|
||||||
|
m_ms_r = random.randint(-1, 1)
|
||||||
|
|
||||||
|
js_le_data = JsDataLeTypePojo(glrd=_received_dict['glrd'], glvd=_received_dict['glvd'],
|
||||||
|
hc=_received_dict['hc'],
|
||||||
|
ua=_received_dict['ua'], br_oh=_received_dict['br_oh'],
|
||||||
|
br_ow=_received_dict['br_ow'],
|
||||||
|
ars_h=_received_dict['ars_h'], ars_w=_received_dict['ars_w'],
|
||||||
|
pr=_received_dict['pr'],
|
||||||
|
plg=_received_dict['plg'], br_h=_received_dict['br_h'],
|
||||||
|
br_w=_received_dict['br_w'],
|
||||||
|
plu=_received_dict['plu'], vnd=_received_dict['vnd'],
|
||||||
|
dvm=_received_dict['dvm'],
|
||||||
|
ts_mtp=_received_dict['ts_mtp'], eva=_received_dict['eva'],
|
||||||
|
rs_h=_received_dict['rs_h'],
|
||||||
|
rs_w=_received_dict['rs_w'], rs_cd=_received_dict['rs_cd'],
|
||||||
|
m_s_c=m_s_c, m_m_c=m_m_c, m_c_c=m_c_c,
|
||||||
|
m_cm_r=m_cm_r, m_ms_r=m_ms_r)
|
||||||
|
time.sleep(random.randint(1, 4))
|
||||||
|
_new_le_cookies = captchaResultGetter.get_le_valid_cookie(proxy_to_use=_proxy_to_use,
|
||||||
|
js_le_type_data=js_le_data,
|
||||||
|
old_valid_cookie=_new_cookies)
|
||||||
|
if _new_le_cookies is not None:
|
||||||
|
# self.logger.info("new le type cookie is " + _new_le_cookies)
|
||||||
|
sender.cookie_str = _new_le_cookies
|
||||||
|
time.sleep(random.randint(1, 3))
|
||||||
|
self.already_tried_contact_list.append(_contact)
|
||||||
|
can_continue = sender.send_request(HERMES_REGISTER, js_data, _contact, csrf=valid_csrf)
|
||||||
|
if can_continue == RequestResult.SUCCESS:
|
||||||
|
# 让服务器读取成功的约会
|
||||||
|
try:
|
||||||
|
self.logger.info("try to remove success contact from list to retrieve mails")
|
||||||
|
self.list_to_retrieve_mails.remove(_contact)
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.info(
|
||||||
|
"exception while remove success contact from list to retrieve mails")
|
||||||
|
print(e)
|
||||||
|
else:
|
||||||
|
can_continue = RequestResult.COOKIES_ERROR
|
||||||
|
else:
|
||||||
|
can_continue = RequestResult.COOKIES_ERROR
|
||||||
|
if can_continue == RequestResult.BLOCKED:
|
||||||
|
self.logger.info("cannot continue, we are blocked " + str(self.valid_csrf))
|
||||||
|
elif can_continue == RequestResult.PROXY_ERROR:
|
||||||
|
self.logger.info("PROXY_ERROR, will not reset valid_csrf")
|
||||||
|
elif can_continue == RequestResult.COOKIES_ERROR:
|
||||||
|
self.logger.info("COOKIES_ERROR, will not reset valid_csrf")
|
||||||
|
else:
|
||||||
|
self.logger.info("can continue, will reset valid_csrf")
|
||||||
|
self.valid_csrf = None
|
||||||
|
return can_continue
|
||||||
|
else:
|
||||||
|
return valid_csrf
|
||||||
|
# return RequestResult.CTRF_ERROR
|
||||||
|
|
||||||
|
def getChTypeJsDataFromDict(self, _received_dict) -> JsDataPojo:
|
||||||
|
return JsDataPojo(glrd=_received_dict['glrd'], glvd=_received_dict['glvd'], hc=_received_dict['hc'],
|
||||||
|
ua=_received_dict['ua'], br_oh=_received_dict['br_oh'], br_ow=_received_dict['br_ow'],
|
||||||
|
ars_h=_received_dict['ars_h'], ars_w=_received_dict['ars_w'], pr=_received_dict['pr'],
|
||||||
|
plg=_received_dict['plg'], br_h=_received_dict['br_h'], br_w=_received_dict['br_w'],
|
||||||
|
plu=_received_dict['plu'], vnd=_received_dict['vnd'], dvm=_received_dict['dvm'],
|
||||||
|
ts_mtp=_received_dict['ts_mtp'], eva=_received_dict['eva'],
|
||||||
|
rs_h=_received_dict['rs_h'],
|
||||||
|
rs_w=_received_dict['rs_w'], rs_cd=_received_dict['rs_cd'])
|
||||||
|
|
||||||
|
def generate_proxy(self):
|
||||||
|
_port = random.randint(40001, 49999)
|
||||||
|
_chosen_proxy = random.choice(self.proxy_to_use_list)
|
||||||
|
self.logger.info("generated port is {}".format(_port))
|
||||||
|
_proxy_to_use = {}
|
||||||
|
_proxy_to_use["http"] = _chosen_proxy["http"].format(_port)
|
||||||
|
_proxy_to_use["https"] = _chosen_proxy["https"].format(_port)
|
||||||
|
return _proxy_to_use
|
||||||
|
|
||||||
|
def on_message(self, ch, method, properties, body):
|
||||||
|
_message_count = self.cookiesPublisher.message_count()
|
||||||
|
self.logger.info("message count in queue is {}".format(_message_count))
|
||||||
|
# prepare the contact list
|
||||||
|
if self.just_send:
|
||||||
|
self.contact_list = filter_contacts(self.contact_list, self.already_tried_contact_list)
|
||||||
|
else:
|
||||||
|
self.contact_list = filter_contacts(self.contact_list)
|
||||||
|
# remove already booked contacts
|
||||||
|
random.shuffle(self.contact_list)
|
||||||
|
_received_object = body.decode("UTF-8")
|
||||||
|
self.logger.info(f" [x] Received {_received_object}")
|
||||||
|
step = 5
|
||||||
|
_received_dict = json.loads(_received_object)
|
||||||
|
js_data = self.getChTypeJsDataFromDict(_received_dict)
|
||||||
|
_received_cookies = _received_dict["cookiesStr"]
|
||||||
|
if len(self.contact_list) > step:
|
||||||
|
_sub_list = self.contact_list[0:step]
|
||||||
|
result = None
|
||||||
|
for con in _sub_list:
|
||||||
|
with ThreadPoolExecutor(max_workers=step) as executor:
|
||||||
|
result = executor.submit(self.send_request, _received_cookies, _received_dict, js_data, self.logger,
|
||||||
|
con)
|
||||||
|
self.logger.info("result is: " + str(result.result()))
|
||||||
|
if result.result() == RequestResult.SUCCESS:
|
||||||
|
self.logger.info("Success for {}, with cookies{}".format(con.mail, _received_cookies))
|
||||||
|
if result.result() == RequestResult.BLOCKED or result.result() == RequestResult.CTRF_ERROR:
|
||||||
|
ch.basic_ack(delivery_tag=method.delivery_tag)
|
||||||
|
|
||||||
|
else:
|
||||||
|
ch.basic_reject(delivery_tag=method.delivery_tag, requeue=True)
|
||||||
|
else:
|
||||||
|
self.retrieve_invalidate_urls()
|
||||||
|
self.logger.info("empty list")
|
||||||
|
time.sleep(120)
|
||||||
|
self.logger.info("will basic_reject method.delivery_tag: " + str(method.delivery_tag))
|
||||||
|
ch.basic_reject(delivery_tag=method.delivery_tag, requeue=True)
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
self.logger.info(threading.currentThread().name + " starts")
|
||||||
|
self.set_up_connection()
|
||||||
|
self.listen_to_queue(self.on_message)
|
||||||
|
self.channel.start_consuming()
|
||||||
|
|
||||||
|
def retrieve_invalidate_urls(self):
|
||||||
|
if not self.already_read_emails and len(self.list_to_retrieve_mails) > 0:
|
||||||
|
self.logger.info("will retrieve validate urls")
|
||||||
|
time.sleep(30)
|
||||||
|
_mail_list = MONGO_STORE_MANAGER.get_destination_emails()
|
||||||
|
_mail_list_filtered = []
|
||||||
|
for mail in _mail_list:
|
||||||
|
for _contact in self.list_to_retrieve_mails:
|
||||||
|
if _contact.mail == mail.mail:
|
||||||
|
_mail_list_filtered.append(mail)
|
||||||
|
self.logger.info("will call find_links_to_validate_from_mail_list, size = " + str(len(_mail_list_filtered)))
|
||||||
|
find_links_to_validate_from_mail_list(_mail_list_filtered)
|
||||||
|
self.already_read_emails = True
|
||||||
|
else:
|
||||||
|
self.logger.info("already read emails, will not retrieve validate urls")
|
||||||
+6
-3
@@ -33,7 +33,7 @@ def filter_contacts(_contact_list: list) -> list:
|
|||||||
for booked in already_sent_contacts:
|
for booked in already_sent_contacts:
|
||||||
if contact.mail == booked.email:
|
if contact.mail == booked.email:
|
||||||
_to_add = False
|
_to_add = False
|
||||||
#如果已经收到链接了,就不要再请求
|
# 如果已经收到链接了,就不要再请求
|
||||||
for link_to_validate in _link_to_validate_list:
|
for link_to_validate in _link_to_validate_list:
|
||||||
if contact.mail == link_to_validate.email:
|
if contact.mail == link_to_validate.email:
|
||||||
logger.info("{}: link already received".format(contact.mail))
|
logger.info("{}: link already received".format(contact.mail))
|
||||||
@@ -69,11 +69,14 @@ def send_appointment_request(message_queue_name, _contact_list):
|
|||||||
receiver.run()
|
receiver.run()
|
||||||
|
|
||||||
|
|
||||||
|
def start(contact_list_file):
|
||||||
|
pass
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
contacts_file_path = '~/Desktop/contact_list_2024-03-23.xlsx'
|
contacts_file_path = '~/Desktop/contact_list_2024-04-05-2.xlsx'
|
||||||
_contact_list = read_contacts(contacts_file_path)
|
_contact_list = read_contacts(contacts_file_path)
|
||||||
_contact_list_to_book = filter_contacts(_contact_list)
|
_contact_list_to_book = filter_contacts(_contact_list)
|
||||||
_segment_number = 5
|
_segment_number = 10
|
||||||
logger.info("{} contacts to book".format(len(_contact_list_to_book)))
|
logger.info("{} contacts to book".format(len(_contact_list_to_book)))
|
||||||
last_thread = None
|
last_thread = None
|
||||||
for i in range(0, _segment_number):
|
for i in range(0, _segment_number):
|
||||||
|
|||||||
@@ -71,5 +71,5 @@ DE_PROXY_RES = {
|
|||||||
# PROXY_LIST = [FR_PROXY_MOBILE, FR_PROXY_RES, DE_PROXY_RES, DE_PROXY_MOBILE, ES_PROXY_MOBILE, IT_PROXY_MOBILE]
|
# PROXY_LIST = [FR_PROXY_MOBILE, FR_PROXY_RES, DE_PROXY_RES, DE_PROXY_MOBILE, ES_PROXY_MOBILE, IT_PROXY_MOBILE]
|
||||||
# PROXY_LIST_FR = [FR_PROXY_MOBILE_2, FR_PROXY_MOBILE, FR_PROXY_RES, FR_PROXY_RES_4, FR_PROXY_RES_2]
|
# PROXY_LIST_FR = [FR_PROXY_MOBILE_2, FR_PROXY_MOBILE, FR_PROXY_RES, FR_PROXY_RES_4, FR_PROXY_RES_2]
|
||||||
# PROXY_LIST_FR = [FR_PROXY_RES_OXY, FR_PROXY_MOBILE_3]
|
# PROXY_LIST_FR = [FR_PROXY_RES_OXY, FR_PROXY_MOBILE_3]
|
||||||
PROXY_LIST_FR = [FR_PROXY_MOB_OXY]
|
PROXY_LIST_FR = [FR_PROXY_RES_OXY]
|
||||||
MOBILE_PROXY_LIST_FR = [FR_PROXY_MOB_OXY_STICKY]
|
MOBILE_PROXY_LIST_FR = [FR_PROXY_MOB_OXY_STICKY]
|
||||||
|
|||||||
Reference in New Issue
Block a user