Files
appointment_request/workers/link_validator.py
T

184 lines
8.3 KiB
Python

import json
import random
import threading
import time
from http.cookies import SimpleCookie
import pika
import requests
from db.mongo_manager import MONGO_STORE_MANAGER
from models.LinkPojo import LinkPojo
from models.result_pojo import RequestResult
from queue_message.CookiesPublisher import CookiesPublisher, REQUEST_DATA_QUEUE_TEST, TEST_QUEUE, SHARED_OBJECT
from queue_message.appointmentrequestsender import QUEUE_HOST, REQUEST_DATA_QUEUE, credentials
from workers.proxies_constants import PROXY_LIST_FR
DOUBLE_MESSAGE = "Une demande de rendez-vous a déjà été enregistrée avec ces coordonnées"
class LinkValidator(threading.Thread):
def __init__(self, link_to_validate_list: list,
cookiesPublisher: CookiesPublisher, proxy_to_use, queue_to_listen=REQUEST_DATA_QUEUE, ip_country="FR",
limit=40):
super().__init__()
self.cookie = SimpleCookie()
self.cookiesPublisher = cookiesPublisher
self.link_to_validate_list = link_to_validate_list
self.queue_to_listen = queue_to_listen
self.ip_country = ip_country
self.filter_with_ip_country()
self.proxy_to_use = proxy_to_use
self.limit = limit
def set_up_connection(self):
self.connection = pika.BlockingConnection(
pika.ConnectionParameters(host=QUEUE_HOST, port=5672, credentials=credentials))
self.channel = self.connection.channel()
def listen_to_queue(self, callback):
self.channel.basic_qos(prefetch_count=1)
self.channel.basic_consume(queue=self.queue_to_listen, auto_ack=False, on_message_callback=callback)
self.channel.start_consuming()
def send_request(self, linkPojo: LinkPojo, _received_dict = None) -> RequestResult:
_ua = 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36'
if _received_dict is not None:
_ua = _received_dict['ua']
self.cookie.load(self.cookie_str)
headers = {
'User-Agent': _ua,
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Cache-Control': 'max-age=0',
'Referer': linkPojo.url,
'Cookie': self.cookie_str,
'Sec-Fetch-Mode': 'navigate',
'Host': 'rendezvousparis.hermes.com',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Dest': 'document',
'Accept-Language': 'fr-FR,fr;q=0.6'}
print(self.proxy_to_use)
print("received cookie is " + str(self.cookie_str))
try:
print("will send request with ua {}".format(_ua))
print("will send request with cookie {}".format(self.cookie_str))
response = requests.get(url=linkPojo.url, headers=headers, verify=False, proxies=self.proxy_to_use,
timeout=15)
print(response.status_code)
if response.status_code == 200:
_content = response.text
print(response.text)
if "Votre demande de rendez-vous Maroquinerie a bien été enregistrée" in _content:
print(response.url)
MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo)
# set new cookies
_cookies_to_set = response.headers['set-cookie']
self.cookie.load(_cookies_to_set)
new_cookies = {k: v.value for k, v in self.cookie.items()}
new_coolies_str = ""
for key in new_cookies:
new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";"
print("will publish to queue {}".format(new_coolies_str))
# upload the cookie to queue
if _received_dict is not None:
_received_dict['cookiesStr'] = new_coolies_str
print("body in json:{}".format(json.dumps(_received_dict)))
self.cookiesPublisher.publish_body(json.dumps(_received_dict))
else:
self.cookiesPublisher.publish_body(new_coolies_str)
self.cookie_str = new_coolies_str
return RequestResult.SUCCESS
elif DOUBLE_MESSAGE in _content:
print(response.url)
MONGO_STORE_MANAGER.link_validated_for_result(response.url, linkPojo, is_duplicated=True)
# set new cookies
_cookies_to_set = response.headers['set-cookie']
self.cookie.load(_cookies_to_set)
new_cookies = {k: v.value for k, v in self.cookie.items()}
new_coolies_str = ""
for key in new_cookies:
new_coolies_str = new_coolies_str + key + "=" + new_cookies[key] + ";"
print("will publish to queue {}".format(new_coolies_str))
# upload the cookie to queue
self.cookiesPublisher.publish_body(new_coolies_str)
self.cookie_str = new_coolies_str
return RequestResult.SUCCESS
else:
return RequestResult.UNKNOWN
else:
return RequestResult.BLOCKED
except Exception as error:
print(error)
return RequestResult.PROXY_ERROR
def on_message(self, ch, method, properties, body):
print(f" [x] Received {body}")
_message_in_queue_count = self.cookiesPublisher.message_count()
print("message count in queue is {}".format(_message_in_queue_count))
self.link_to_validate_list = MONGO_STORE_MANAGER.get_links_to_validate()
self.filter_with_ip_country()
_received_object = body.decode("UTF-8")
js_data = None
_received_dict = None
if "glrd" in _received_object:
_received_dict = json.loads(_received_object)
_received_cookies = _received_dict["cookiesStr"]
else:
_received_cookies = _received_object
self.cookie_str = _received_cookies
random.shuffle(self.link_to_validate_list)
if len(self.link_to_validate_list) > 0 and _message_in_queue_count >= self.limit:
print("links number is {}".format(len(self.link_to_validate_list)))
for con in self.link_to_validate_list:
# if not is_already_sent(con):
print(con.email)
self.proxy_to_use = random.choice(PROXY_LIST_FR)
can_continue = self.send_request(con, _received_dict)
if can_continue == RequestResult.BLOCKED:
print("cannot continue, blocked, then skip")
break
else:
time.sleep(random.randint(2, 5))
print("can continue, continue")
print("will ack")
ch.basic_ack(delivery_tag=method.delivery_tag)
else:
print("empty list, no need to ack")
time.sleep(60)
ch.basic_reject(delivery_tag=method.delivery_tag, requeue=True)
def filter_with_ip_country(self):
_link_list_to_click = []
for _link in self.link_to_validate_list:
if _link.ip_country == self.ip_country:
_link_list_to_click.append(_link)
self.link_to_validate_list = _link_list_to_click
def validate_with_FR_ip():
_queue_name = TEST_QUEUE
link_list = MONGO_STORE_MANAGER.get_links_to_validate()
cookiesPublisher = CookiesPublisher(queue_name=TEST_QUEUE)
cookiesPublisher.set_up_connection()
print("filter links with ip_country")
_link_list_to_click = []
for _link in link_list:
if _link.ip_country == "FR":
_link_list_to_click.append(_link)
for _l in _link_list_to_click:
print(_l.ip_country)
_fr_proxy_to_use = random.choice(PROXY_LIST_FR)
receiver = LinkValidator(link_to_validate_list=_link_list_to_click, cookiesPublisher=cookiesPublisher,
proxy_to_use=_fr_proxy_to_use,
queue_to_listen=_queue_name, ip_country="FR", limit=0)
print("will connect to queue")
receiver.set_up_connection()
receiver.listen_to_queue(receiver.on_message)
pass
if __name__ == '__main__':
validate_with_FR_ip()