break when the get_csrf is blocked

This commit is contained in:
2024-04-03 16:25:48 +02:00
parent 7c23745679
commit 8f3b35f55f
2 changed files with 16 additions and 6 deletions
+8 -2
View File
@@ -127,9 +127,15 @@ class AppointmentRequestSender(threading.Thread):
for con in self.contact_list: for con in self.contact_list:
self.logger.info(con.mail) self.logger.info(con.mail)
if self.valid_csrf is None: if self.valid_csrf is None:
self.valid_csrf = captchaResultGetter.get_csrf( csrf_result = captchaResultGetter.get_csrf(
proxy_to_use=_proxy_to_use, js_data=js_data, proxy_to_use=_proxy_to_use, js_data=js_data,
cookie=_received_cookies) cookie=_received_cookies)
if isinstance(csrf_result, str):
self.valid_csrf = csrf_result
else:
self.logger.info("csrf is {}".format(csrf_result))
if csrf_result == RequestResult.BLOCKED:
break
_new_cookies = captchaResultGetter.get_valid_ch_cookie(sender.proxy_to_use, js_data, _new_cookies = captchaResultGetter.get_valid_ch_cookie(sender.proxy_to_use, js_data,
old_valid_cookie=_received_cookies) old_valid_cookie=_received_cookies)
if _new_cookies is not None: if _new_cookies is not None:
@@ -181,7 +187,7 @@ class AppointmentRequestSender(threading.Thread):
else: else:
can_continue = RequestResult.COOKIES_ERROR can_continue = RequestResult.COOKIES_ERROR
if can_continue == RequestResult.BLOCKED: if can_continue == RequestResult.BLOCKED:
self.logger.info("cannot continue, valid_csrf is " + str(self.valid_csrf)) self.logger.info("BLOCKED, valid_csrf is " + str(self.valid_csrf))
break break
elif can_continue == RequestResult.PROXY_ERROR: elif can_continue == RequestResult.PROXY_ERROR:
self.logger.info("PROXY_ERROR, will not reset valid_csrf") self.logger.info("PROXY_ERROR, will not reset valid_csrf")
+8 -4
View File
@@ -6,6 +6,7 @@ import requests
from models.jsdata_le_pojo import JsDataLeTypePojo from models.jsdata_le_pojo import JsDataLeTypePojo
from models.jsdata_pojo import JsDataPojo from models.jsdata_pojo import JsDataPojo
from models.result_pojo import RequestResult
from utils.get_only_datadome_cookies import get_datadome_cookies, get_app_cookies, get_lang_cookies, \ from utils.get_only_datadome_cookies import get_datadome_cookies, get_app_cookies, get_lang_cookies, \
retain_only_dataome_cookies retain_only_dataome_cookies
from workers.proxies_constants import PROXY_LIST_FR from workers.proxies_constants import PROXY_LIST_FR
@@ -24,7 +25,7 @@ class CaptchaResultGetter:
self.cookie_str = 'datadome=5Nq~NEP_qQSHC0g_lZNnZmEv36J8gVV~rpZ329xmCkTq2~H3meIoXr4h_b988qB2XW5Te7iEGsvq8BzA5KeFupyrZFh4kgrDyl8hT2UymSByKHzAcDaNIBPDsRu2g_KG; Max-Age=31536000; Domain=.hermes.com; Path=/; Secure; SameSite=None' self.cookie_str = 'datadome=5Nq~NEP_qQSHC0g_lZNnZmEv36J8gVV~rpZ329xmCkTq2~H3meIoXr4h_b988qB2XW5Te7iEGsvq8BzA5KeFupyrZFh4kgrDyl8hT2UymSByKHzAcDaNIBPDsRu2g_KG; Max-Age=31536000; Domain=.hermes.com; Path=/; Secure; SameSite=None'
pass pass
def get_csrf(self, proxy_to_use, js_data: JsDataPojo, cookie: str = None) -> Union[str, None]: def get_csrf(self, proxy_to_use, js_data: JsDataPojo, cookie: str = None) -> Union[str, RequestResult]:
if cookie is not None: if cookie is not None:
headers = {'Content-Type': 'application/x-www-form-urlencoded', headers = {'Content-Type': 'application/x-www-form-urlencoded',
'User-Agent': js_data.ua, 'User-Agent': js_data.ua,
@@ -49,13 +50,16 @@ class CaptchaResultGetter:
timeout=15) timeout=15)
print(response.status_code) print(response.status_code)
if response.status_code == 200: if response.status_code == 200:
# print(response.text) print(response.text)
print(response.url)
return self.extract_csrf_from_html(response.text) return self.extract_csrf_from_html(response.text)
elif response.status_code == 403:
return RequestResult.BLOCKED
else: else:
return None print(response.text)
return RequestResult.UNKNOWN
except Exception as error: except Exception as error:
print(error) print(error)
return RequestResult.PROXY_ERROR
def extract_csrf_from_html(self, html: str) -> Union[str, None]: def extract_csrf_from_html(self, html: str) -> Union[str, None]:
result = re.findall(r'_csrf" value="[A-Za-z0-9-_]+', html) result = re.findall(r'_csrf" value="[A-Za-z0-9-_]+', html)