Selaa lähdekoodia

Fixed tesseract issue

Matt 5 vuotta sitten
vanhempi
commit
5ce3cc17bb

+ 17 - 9
src/core/operations/OpticalCharacterRecognition.mjs

@@ -1,6 +1,7 @@
 /**
 /**
  * @author n1474335 [n1474335@gmail.com]
  * @author n1474335 [n1474335@gmail.com]
  * @author mshwed [m@ttshwed.com]
  * @author mshwed [m@ttshwed.com]
+ * @author Matt C [me@mitt.dev]
  * @copyright Crown Copyright 2019
  * @copyright Crown Copyright 2019
  * @license Apache-2.0
  * @license Apache-2.0
  */
  */
@@ -12,7 +13,7 @@ import { toBase64 } from "../lib/Base64.mjs";
 import { isWorkerEnvironment } from "../Utils.mjs";
 import { isWorkerEnvironment } from "../Utils.mjs";
 
 
 import Tesseract from "tesseract.js";
 import Tesseract from "tesseract.js";
-const { TesseractWorker } = Tesseract;
+const { createWorker } = Tesseract;
 
 
 import process from "process";
 import process from "process";
 
 
@@ -60,23 +61,30 @@ class OpticalCharacterRecognition extends Operation {
         const assetDir = isWorkerEnvironment() ? `${self.docURL}/assets/` : `${process.cwd()}/src/core/vendor/`;
         const assetDir = isWorkerEnvironment() ? `${self.docURL}/assets/` : `${process.cwd()}/src/core/vendor/`;
 
 
         try {
         try {
+            self.sendStatusMessage("Spinning up Tesseract worker...");
             const image = `data:${type};base64,${toBase64(input)}`;
             const image = `data:${type};base64,${toBase64(input)}`;
-            const worker = new TesseractWorker({
+            const worker = createWorker({
                 workerPath: `${assetDir}tesseract/worker.min.js`,
                 workerPath: `${assetDir}tesseract/worker.min.js`,
                 langPath: `${assetDir}tesseract/lang-data`,
                 langPath: `${assetDir}tesseract/lang-data`,
                 corePath: `${assetDir}tesseract/tesseract-core.wasm.js`,
                 corePath: `${assetDir}tesseract/tesseract-core.wasm.js`,
-            });
-            const result = await worker.recognize(image)
-                .progress(progress => {
+                logger: progress => {
                     if (isWorkerEnvironment()) {
                     if (isWorkerEnvironment()) {
-                        self.sendStatusMessage(`Status: ${progress.status} - ${(parseFloat(progress.progress)*100).toFixed(2)}%`);
+                        self.sendStatusMessage(`Status: ${progress.status}${progress.status === "recognizing text" ? ` - ${(parseFloat(progress.progress)*100).toFixed(2)}%`: "" }`);
                     }
                     }
-                });
+                }
+            });
+            await worker.load();
+            self.sendStatusMessage("Loading English language...");
+            await worker.loadLanguage("eng");
+            self.sendStatusMessage("Intialising Tesseract API...");
+            await worker.initialize("eng");
+            self.sendStatusMessage("Finding text...");
+            const result = await worker.recognize(image);
 
 
             if (showConfidence) {
             if (showConfidence) {
-                return `Confidence: ${result.confidence}%\n\n${result.text}`;
+                return `Confidence: ${result.data.confidence}%\n\n${result.data.text}`;
             } else {
             } else {
-                return result.text;
+                return result.data.text;
             }
             }
         } catch (err) {
         } catch (err) {
             throw new OperationError(`Error performing OCR on image. (${err})`);
             throw new OperationError(`Error performing OCR on image. (${err})`);

Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 0 - 7
src/core/vendor/tesseract/tesseract-core.wasm.js


Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 0 - 0
src/core/vendor/tesseract/worker.min.js


+ 8 - 0
webpack.config.js

@@ -56,6 +56,14 @@ module.exports = {
                 context: "src/core/vendor/",
                 context: "src/core/vendor/",
                 from: "tesseract/**/*",
                 from: "tesseract/**/*",
                 to: "assets/"
                 to: "assets/"
+            }, {
+                context: "node_modules/tesseract.js/",
+                from: "dist/worker.min.js",
+                to: "assets/tesseract"
+            }, {
+                context: "node_modules/tesseract.js-core/",
+                from: "tesseract-core.wasm.js",
+                to: "assets/tesseract"
             }
             }
         ])
         ])
     ],
     ],

Kaikkia tiedostoja ei voida näyttää, sillä liian monta tiedostoa muuttui tässä diffissä