From cf2af4bf7bd2dcc1b40fa451ba693b25c0625418 Mon Sep 17 00:00:00 2001 From: PAN Lei Date: Mon, 28 Mar 2022 14:17:12 +0200 Subject: [PATCH] can run playwright in multi-thread --- check_results.py | 131 ++++++++++++++++++++++++++--------------------- 1 file changed, 73 insertions(+), 58 deletions(-) diff --git a/check_results.py b/check_results.py index b505276..0d959bc 100644 --- a/check_results.py +++ b/check_results.py @@ -1,7 +1,11 @@ +import asyncio import random +import threading +from concurrent.futures import ThreadPoolExecutor from enum import Enum from typing import Union +from playwright.async_api import async_playwright from playwright.sync_api import sync_playwright import params @@ -22,28 +26,39 @@ class ResultEnum(Enum): PENDING = "PENDING" -# check result with playright -def check_result_page(url, phone_number: str) -> ResultEnum: - with sync_playwright() as p: - return run(p, url, phone_number) +class Tls(threading.local): + def __init__(self) -> None: + self.playwright = sync_playwright().start() + print("Create playwright instance in Thread", threading.current_thread().name) -def run(playwright, url, phone_number: str) -> ResultEnum: - browser = playwright.firefox.launch(headless=False) - url_to_check = url.replace("register/", "") - url_to_check = url_to_check + "?lang=fr" - content = None - random_id_number = str(phone_number)[1:len(str(phone_number))] - proxy_username = "panleicim-stc-fr-" + random_id_number - print("proxy_username is " + proxy_username) - proxy = { - "server": params.PROXY_SERVER, - "username": proxy_username, - "password": params.PROXY_PASSWORD - } - while content is None: - content = load_page(browser, url_to_check, proxy) - random_id_number = get_random_id_number_for_proxy() +class Worker: + tls = Tls() + + def load_page(self, browser, url, proxy) -> Union[str, None]: + try: + firefox_user_agents = filter(lambda user_agent: "firefox" in user_agent.lower(), user_agent_list) + firefox_user_agents_list = list(firefox_user_agents) + user_agent = random.choice(firefox_user_agents_list) + page = browser.new_page( + user_agent=user_agent, + proxy=proxy) + page.add_init_script("""() => Object.defineProperty(navigator,'webdriver',{get: () => undefined}""") + page.goto(url, timeout=90000) + return page.content() + except Exception as error: + print(error) + return None + + def run(self, reserve_pojo: ReserveResultPojo, collection): + print("Launched worker in ", threading.current_thread().name) + browser = self.tls.playwright.firefox.launch(headless=False) + url = reserve_pojo.url + phone_number = reserve_pojo.phone + url_to_check = url.replace("register/", "") + url_to_check = url_to_check + "?lang=fr" + content = None + random_id_number = str(phone_number)[1:len(str(phone_number))] proxy_username = "panleicim-stc-fr-" + random_id_number print("proxy_username is " + proxy_username) proxy = { @@ -51,33 +66,30 @@ def run(playwright, url, phone_number: str) -> ResultEnum: "username": proxy_username, "password": params.PROXY_PASSWORD } - print(content) - browser.close() - if SORRY_SENTENCE in content: - print("status is REFUSED") - return ResultEnum.REFUSED - elif PENDING_SENTENCE in content: - print("status is PENDING") - return ResultEnum.PENDING - else: - print("status is ACCEPTED") - return ResultEnum.ACCEPTED - - -def load_page(browser, url, proxy) -> Union[str, None]: - try: - firefox_user_agents = filter(lambda user_agent: "firefox" in user_agent.lower(), user_agent_list) - firefox_user_agents_list = list(firefox_user_agents) - user_agent = random.choice(firefox_user_agents_list) - page = browser.new_page( - user_agent=user_agent, - proxy=proxy) - page.add_init_script("""() => Object.defineProperty(navigator,'webdriver',{get: () => undefined}""") - page.goto(url, timeout=90000) - return page.content() - except Exception as error: - print(error) - return None + while content is None: + content = self.load_page(browser, url_to_check, proxy) + random_id_number = get_random_id_number_for_proxy() + proxy_username = "panleicim-stc-fr-" + random_id_number + print("proxy_username is " + proxy_username) + proxy = { + "server": params.PROXY_SERVER, + "username": proxy_username, + "password": params.PROXY_PASSWORD + } + print(content) + browser.close() + print("Stopped worker in ", threading.current_thread().name) + status = None + if SORRY_SENTENCE in content: + print("status is REFUSED") + status = ResultEnum.REFUSED + elif PENDING_SENTENCE in content: + print("status is PENDING") + status = ResultEnum.PENDING + else: + print("status is ACCEPTED") + status = ResultEnum.ACCEPTED + collection.document(reserve_pojo.id).update({u'accepted': status.name}) # need to start at 21h00 @@ -85,15 +97,18 @@ if __name__ == '__main__': # get the list params.oracle_log_sender.send_log(msg="开始检查约会结果", subject=LOG_SUBJECT_EVENT, type=TYPE_EVENT_CHECK_RESULTS) db_manager = params.firebase_store_manager - collection = db_manager.get_all_successful_items() + collection = db_manager.get_all_successful_items_for_day("2022-03-16") count = 0 - for appointment in collection.stream(): - count = count + 1 - reserve_pojo = ReserveResultPojo.from_firestore_dict(appointment.to_dict()) - if reserve_pojo.accepted: - print("status is " + reserve_pojo.accepted) - if reserve_pojo.accepted is None: - #or ResultEnum.PENDING.value == reserve_pojo.accepted: - result = check_result_page(reserve_pojo.url, reserve_pojo.phone) - collection.document(reserve_pojo.id).update({u'accepted': result.name}) - print(count) + task_list = list() + with ThreadPoolExecutor(max_workers=6) as executor: + for appointment in collection.stream(): + worker = Worker() + count = count + 1 + reserve_pojo = ReserveResultPojo.from_firestore_dict(appointment.to_dict()) + # if reserve_pojo.accepted: + # print("status is " + reserve_pojo.accepted) + # if reserve_pojo.accepted is None: + # or ResultEnum.PENDING.value == reserve_pojo.accepted: + # tasks = check_result_page(reserve_pojo.url, reserve_pojo.phone) + executor.submit(worker.run, reserve_pojo, collection) + print(count)