From 076a5195b7ca8a73526a23930a0710c1202f5238 Mon Sep 17 00:00:00 2001 From: Lei PAN Date: Fri, 19 May 2023 21:00:38 +0200 Subject: [PATCH] use ocr to identifier page content --- package.json | 3 +- src/workers/CommandorPage.js | 42 +++++++++++++++++++------- src/workers/OCRChecker.js | 46 +++++++++++++++++++++++++++++ src/workers/SlidingCaptchaSolver.js | 10 +------ 4 files changed, 81 insertions(+), 20 deletions(-) create mode 100644 src/workers/OCRChecker.js diff --git a/package.json b/package.json index 26b9143..7de3632 100644 --- a/package.json +++ b/package.json @@ -20,7 +20,8 @@ "node-wget": "^0.4.3", "node-xlsx": "^0.21.0", "playwright": "^1.32.1", - "puppeteer": "^1.20.0", + "puppeteer": "^15.5.0", + "node-tesseract-ocr": "^2.2.1", "ws": "^6.2.0", "read-ini-file": "^3.0.1", "uuid": "^9.0.0", diff --git a/src/workers/CommandorPage.js b/src/workers/CommandorPage.js index f303f71..5efc32c 100644 --- a/src/workers/CommandorPage.js +++ b/src/workers/CommandorPage.js @@ -11,6 +11,7 @@ const { const GeoCaptchaSolver = require("./GeoCaptchaSolver"); const SlidingCaptchaSolver = require("./SlidingCaptchaSolver"); const {de} = require("yarn/lib/cli"); +const OCRChecker = require("./OCRChecker"); // const RDV_URL = "http://192.168.0.44:8000/test_appointment.html" const RDV_URL = "https://rendezvousparis.hermes.com/client/register"; const BLANK_URL = "about:blank" @@ -321,8 +322,8 @@ class CommandorPage { } } - async clickValid(page) { - await this.saveCookies() + async clickValid() { + // await this.saveCookies() await delay(getRandomWaitTime()) try { if (!this.page.isClosed()) { @@ -337,9 +338,15 @@ class CommandorPage { // remove debug flag // const validElement = await page.$('.btn'); console.log("will click on valid button") + console.log("will click on valid button") + console.log("will click on valid button") this.page.evaluate(() => { document.getElementsByClassName("btn")[0].click(); }) + // this.browser.disconnect(); + await delay(2000); + let ocrChecker = new OCRChecker(this.device, this.contact) + await ocrChecker.get_result() } catch (e) { log(e) } @@ -360,6 +367,7 @@ class CommandorPage { let pageContent = await page.content() let hasCaptcha = pageContent.includes("g-recaptcha-response") if (hasCaptcha) { + // await this.browser.disconnect() await this.clickCheckbox() await delay(1000) this.captchaSolver = new SolveCaptcha(page); @@ -511,7 +519,7 @@ class CommandorPage { } else { log("audioBtn not found") console.log("audioBtn not found") - console.log("we are blocked") + logWithDevice("we are blocked", this.device) await this.resetBrowser() } } @@ -533,7 +541,7 @@ class CommandorPage { // log("onResponse with url:" + response.url()) // log("onResponse with url:" + response.body()) if (rex.test(response.url())) { - log("rdv url found:" + response.url()) + logWithDevice("rdv url found:" + response.url(), this.device) // save cookies await this.saveCookies(); await this.push_message_to_db(PublishType.SUCCESS, response.url()) @@ -591,12 +599,26 @@ class CommandorPage { } async clickCheckbox() { - // try { - // // let errorItem = await this.page.click("#recaptcha-anchor > div.recaptcha-checkbox-border") - // await this.page.$('[title="reCAPTCHA"]').getByRole('checkbox', {name: 'I\'m not a robot'}).click(); - // } catch (e) { - // log(e); - // } + try { + // let errorItem = await this.page.click("#recaptcha-anchor > div.recaptcha-checkbox-border") + await this.page.waitForSelector("iframe") + let elementHandle = await this.page.$('[title="reCAPTCHA"]') + const iframe = await elementHandle.contentFrame() + await iframe.click("#recaptcha-anchor > div.recaptcha-checkbox-border") + // // .getByRole('checkbox', {name: 'I\'m not a robot'}) + // if (enCheckbox) { + // enCheckbox.click() + // } else { + // let frCheckbox = await this.page.$('[title="reCAPTCHA"]').getByRole('checkbox', {name: 'Je ne suis pas un robot'}) + // if (frCheckbox) { + // frCheckbox.click() + // } else { + // console.log("recaptcha checkbox not found") + // } + // } + } catch (e) { + log(e); + } } async handleError(errorContent) { diff --git a/src/workers/OCRChecker.js b/src/workers/OCRChecker.js new file mode 100644 index 0000000..bc82cc3 --- /dev/null +++ b/src/workers/OCRChecker.js @@ -0,0 +1,46 @@ +const {v4: uuidv4} = require("uuid"); +const tesseract = require("node-tesseract-ocr"); + +function delay(delayInMs) { + return new Promise(resolve => { + setTimeout(() => { + resolve(2); + }, delayInMs); + }); +} + +const config = { + lang: "eng", + oem: 1, + psm: 3, +} + + +class OCRChecker { + + constructor(device, contact) { + this.device = device; + this.contact = contact; + } + + get_file_name() { + let uuid = uuidv4(); + return this.contact.passportNumber + "_" + uuid + ".png" + } + + async get_result() { + let fileName = await this.take_screen_shot() + let result = await tesseract + .recognize(fileName, config) + console.log(result) + } + + async take_screen_shot() { + let name = this.get_file_name() + await this.device.screenshot({path: name}); + await delay(1000); + return name + // console.log(`stdout: ${stdout1}`); + } +} +module.exports = OCRChecker diff --git a/src/workers/SlidingCaptchaSolver.js b/src/workers/SlidingCaptchaSolver.js index 4c4251f..4b1a126 100644 --- a/src/workers/SlidingCaptchaSolver.js +++ b/src/workers/SlidingCaptchaSolver.js @@ -1,8 +1,6 @@ const fs = require("fs"); const {exec} = require("child_process"); -const Position = require("../models/Position"); - const axios = require("axios"); const {v4: uuidv4} = require('uuid'); @@ -20,12 +18,6 @@ function randomIntFromInterval(min, max) { // min and max included return Math.floor(Math.random() * (max - min + 1) + min) } -const positionInfo = new Map(); - -positionInfo.set("2109119DG", new Position(211, 1710)) -positionInfo.set("ONEPLUS A6000", new Position(211, 1706)) -positionInfo.set("SM-C5000", new Position(180, 1794)) - class SlidingCaptchaSolver { constructor(device) { this.device = device; @@ -54,7 +46,7 @@ class SlidingCaptchaSolver { //get resolution of screen await this.sendRequest(blockedImageFileName, async (detectedPositionList) => { console.log("detectedPosition is " + detectedPositionList); - if (detectedPositionList.length === 2) { + if (detectedPositionList.length >= 2) { // #xiaomi let startPosition = detectedPositionList.filter((positionInfo) => { return positionInfo.label === "origin"