use ocr to identifier page content

This commit is contained in:
Lei PAN
2023-05-19 21:00:38 +02:00
parent 7717a04b43
commit 076a5195b7
4 changed files with 81 additions and 20 deletions
+2 -1
View File
@@ -20,7 +20,8 @@
"node-wget": "^0.4.3",
"node-xlsx": "^0.21.0",
"playwright": "^1.32.1",
"puppeteer": "^1.20.0",
"puppeteer": "^15.5.0",
"node-tesseract-ocr": "^2.2.1",
"ws": "^6.2.0",
"read-ini-file": "^3.0.1",
"uuid": "^9.0.0",
+31 -9
View File
@@ -11,6 +11,7 @@ const {
const GeoCaptchaSolver = require("./GeoCaptchaSolver");
const SlidingCaptchaSolver = require("./SlidingCaptchaSolver");
const {de} = require("yarn/lib/cli");
const OCRChecker = require("./OCRChecker");
// const RDV_URL = "http://192.168.0.44:8000/test_appointment.html"
const RDV_URL = "https://rendezvousparis.hermes.com/client/register";
const BLANK_URL = "about:blank"
@@ -321,8 +322,8 @@ class CommandorPage {
}
}
async clickValid(page) {
await this.saveCookies()
async clickValid() {
// await this.saveCookies()
await delay(getRandomWaitTime())
try {
if (!this.page.isClosed()) {
@@ -337,9 +338,15 @@ class CommandorPage {
// remove debug flag
// const validElement = await page.$('.btn');
console.log("will click on valid button")
console.log("will click on valid button")
console.log("will click on valid button")
this.page.evaluate(() => {
document.getElementsByClassName("btn")[0].click();
})
// this.browser.disconnect();
await delay(2000);
let ocrChecker = new OCRChecker(this.device, this.contact)
await ocrChecker.get_result()
} catch (e) {
log(e)
}
@@ -360,6 +367,7 @@ class CommandorPage {
let pageContent = await page.content()
let hasCaptcha = pageContent.includes("g-recaptcha-response")
if (hasCaptcha) {
// await this.browser.disconnect()
await this.clickCheckbox()
await delay(1000)
this.captchaSolver = new SolveCaptcha(page);
@@ -511,7 +519,7 @@ class CommandorPage {
} else {
log("audioBtn not found")
console.log("audioBtn not found")
console.log("we are blocked")
logWithDevice("we are blocked", this.device)
await this.resetBrowser()
}
}
@@ -533,7 +541,7 @@ class CommandorPage {
// log("onResponse with url:" + response.url())
// log("onResponse with url:" + response.body())
if (rex.test(response.url())) {
log("rdv url found:" + response.url())
logWithDevice("rdv url found:" + response.url(), this.device)
// save cookies
await this.saveCookies();
await this.push_message_to_db(PublishType.SUCCESS, response.url())
@@ -591,12 +599,26 @@ class CommandorPage {
}
async clickCheckbox() {
// try {
// // let errorItem = await this.page.click("#recaptcha-anchor > div.recaptcha-checkbox-border")
// await this.page.$('[title="reCAPTCHA"]').getByRole('checkbox', {name: 'I\'m not a robot'}).click();
// } catch (e) {
// log(e);
try {
// let errorItem = await this.page.click("#recaptcha-anchor > div.recaptcha-checkbox-border")
await this.page.waitForSelector("iframe")
let elementHandle = await this.page.$('[title="reCAPTCHA"]')
const iframe = await elementHandle.contentFrame()
await iframe.click("#recaptcha-anchor > div.recaptcha-checkbox-border")
// // .getByRole('checkbox', {name: 'I\'m not a robot'})
// if (enCheckbox) {
// enCheckbox.click()
// } else {
// let frCheckbox = await this.page.$('[title="reCAPTCHA"]').getByRole('checkbox', {name: 'Je ne suis pas un robot'})
// if (frCheckbox) {
// frCheckbox.click()
// } else {
// console.log("recaptcha checkbox not found")
// }
// }
} catch (e) {
log(e);
}
}
async handleError(errorContent) {
+46
View File
@@ -0,0 +1,46 @@
const {v4: uuidv4} = require("uuid");
const tesseract = require("node-tesseract-ocr");
function delay(delayInMs) {
return new Promise(resolve => {
setTimeout(() => {
resolve(2);
}, delayInMs);
});
}
const config = {
lang: "eng",
oem: 1,
psm: 3,
}
class OCRChecker {
constructor(device, contact) {
this.device = device;
this.contact = contact;
}
get_file_name() {
let uuid = uuidv4();
return this.contact.passportNumber + "_" + uuid + ".png"
}
async get_result() {
let fileName = await this.take_screen_shot()
let result = await tesseract
.recognize(fileName, config)
console.log(result)
}
async take_screen_shot() {
let name = this.get_file_name()
await this.device.screenshot({path: name});
await delay(1000);
return name
// console.log(`stdout: ${stdout1}`);
}
}
module.exports = OCRChecker
+1 -9
View File
@@ -1,8 +1,6 @@
const fs = require("fs");
const {exec} = require("child_process");
const Position = require("../models/Position");
const axios = require("axios");
const {v4: uuidv4} = require('uuid');
@@ -20,12 +18,6 @@ function randomIntFromInterval(min, max) { // min and max included
return Math.floor(Math.random() * (max - min + 1) + min)
}
const positionInfo = new Map();
positionInfo.set("2109119DG", new Position(211, 1710))
positionInfo.set("ONEPLUS A6000", new Position(211, 1706))
positionInfo.set("SM-C5000", new Position(180, 1794))
class SlidingCaptchaSolver {
constructor(device) {
this.device = device;
@@ -54,7 +46,7 @@ class SlidingCaptchaSolver {
//get resolution of screen
await this.sendRequest(blockedImageFileName, async (detectedPositionList) => {
console.log("detectedPosition is " + detectedPositionList);
if (detectedPositionList.length === 2) {
if (detectedPositionList.length >= 2) {
// #xiaomi
let startPosition = detectedPositionList.filter((positionInfo) => {
return positionInfo.label === "origin"