need to use openCv to identifiy text

2023-05-22 00:57:12 +02:00
parent 0c99c6b36f
commit 48a7338928
7 changed files with 222 additions and 96 deletions
@@ -0,0 +1,107 @@
+const path = require('path');
+const fs = require('fs');
+const { cv, drawBlueRect } = require('opencv4nodejs/ut');
+// const { extractResults } = require('./dnn/ssdUtils');
+
+if (!cv.xmodules.dnn) {
+    throw new Error('exiting: opencv4nodejs compiled without dnn module');
+}
+
+const modelPath = path.resolve(__dirname,
+    '../data/text-models/frozen_east_text_detection.pb');
+const imgPath = path.resolve(__dirname, '../data/text-data/detection.png');
+
+if (!fs.existsSync(modelPath)) {
+    console.log('could not find EAST model');
+    console.log('download the model from: https://github.com/oyyd/frozen_east_text_detection.pb/blob/71415464412c55bb1d135fcdeda498e29a67effa/frozen_east_text_detection.pb?raw=true'
+        + ' or create a .pb model from https://github.com/argman/EAST');
+    throw new Error('exiting: could not find EAST model');
+}
+
+const MIN_CONFIDENCE = 0.5;
+const NMS_THRESHOLD = 0.4;
+const SIZE = 320;
+
+function decode(scores, geometry, confThreshold) {
+    const [numRows, numCols] = scores.sizes.slice(2);
+    const boxes = [];
+    const confidences = [];
+
+    for (let y = 0; y < numRows; y += 1) {
+        for (let x = 0; x < numCols; x += 1) {
+            const score = scores.at([0, 0, y, x]);
+
+            if (score < MIN_CONFIDENCE) {
+                continue;
+            }
+
+            const offsetX = x * 4;
+            const offsetY = y * 4;
+            const angle = geometry.at([0, 4, y, x]);
+            const cos = Math.cos(angle);
+            const sin = Math.sin(angle);
+
+            const h = geometry.at([0, 0, y, x]) + geometry.at([0, 2, y, x]);
+            const w = geometry.at([0, 1, y, x]) + geometry.at([0, 3, y, x]);
+
+            const endX = offsetX + (cos * geometry.at([0, 1, y, x])) + (sin * geometry.at([0, 2, y, x]));
+            const endY = offsetY - (sin * geometry.at([0, 1, y, x])) + (cos * geometry.at([0, 2, y, x]));
+            const startX = endX - w;
+            const startY = endY - h;
+
+            boxes.push(new cv.Rect(
+                startX,
+                startY,
+                endX - startX,
+                endY - startY,
+            ));
+            confidences.push(score);
+        }
+    }
+
+    return [boxes, confidences];
+}
+
+function detection(modelAbsPath, imgAbsPath) {
+    const net = cv.readNetFromTensorflow(modelPath);
+    const img = cv.imread(imgAbsPath);
+    const [imgHeight, imgWidth] = img.sizes;
+    const widthRatio = imgWidth / SIZE;
+    const heightRatio = imgHeight / SIZE;
+
+    const inputBlob = cv.blobFromImage(img, 1,
+        new cv.Size(SIZE, SIZE), new cv.Vec3(123.68, 116.78, 103.94), true, false);
+
+    net.setInput(inputBlob);
+
+    const outBlobNames = [
+        'feature_fusion/Conv_7/Sigmoid',
+        'feature_fusion/concat_3',
+    ];
+
+    const [scores, geometry] = net.forward(outBlobNames);
+    const [boxes, confidences] = decode(scores, geometry, MIN_CONFIDENCE);
+
+    const indices = cv.NMSBoxes(
+        boxes,
+        confidences, MIN_CONFIDENCE, NMS_THRESHOLD
+    );
+
+    indices.forEach((i) => {
+        const rect = boxes[i];
+        const imgRect = new cv.Rect(
+            rect.x * widthRatio,
+            rect.y * heightRatio,
+            rect.width * widthRatio,
+            rect.height * heightRatio,
+        )
+        drawBlueRect(img, imgRect);
+    });
+
+    cv.imshowWait('EAST text detection', img);
+}
+
+detection(
+    modelPath,
+    imgPath
+);