[tesseract] Remove dependency and related code

This commit is contained in:
Neeraj Gupta 2023-03-10 08:03:00 +05:30
parent 7284a6f0b4
commit 493888e917
No known key found for this signature in database
GPG key ID: 3C5A1684DC1729E1
11 changed files with 0 additions and 386 deletions

4
.gitmodules vendored
View file

@ -2,10 +2,6 @@
path = thirdparty/photoswipe
url = https://github.com/ente-io/PhotoSwipe.git
branch = master
[submodule "thirdparty/tesseract"]
path = thirdparty/tesseract
url = https://github.com/abhinavkgrd/tesseract.js.git
branch = worker-support
[submodule "ffmpeg-wasm"]
path = thirdparty/ffmpeg-wasm
url = https://github.com/abhinavkgrd/ffmpeg.wasm.git

View file

@ -71,7 +71,6 @@
"sanitize-filename": "^1.6.3",
"similarity-transformation": "^0.0.1",
"styled-components": "^5.3.5",
"tesseract.js": "file:./thirdparty/tesseract",
"transformation-matrix": "^2.10.0",
"workbox-precaching": "^6.1.5",
"workbox-recipes": "^6.1.5",

View file

@ -15,7 +15,6 @@ import { ibExtractFaceImageFromCrop } from 'utils/machineLearning/faceCrop';
import { FaceCropsRow, FaceImagesRow, ImageBitmapView } from './ImageViews';
import ssdMobileNetV2Service from 'services/machineLearning/ssdMobileNetV2Service';
import { DEFAULT_ML_SYNC_CONFIG } from 'constants/machineLearning/config';
// import tesseractService from 'services/machineLearning/tesseractService';
import imageSceneService from 'services/machineLearning/imageSceneService';
import { addLogLine } from 'utils/logging';
@ -110,13 +109,6 @@ export default function MLFileDebugView(props: MLFileDebugViewProps) {
);
addLogLine('detectedScenes: ', JSON.stringify(sceneDetections));
// const textDetections = await tesseractService.detectText(
// imageBitmap,
// DEFAULT_ML_SYNC_CONFIG.textDetection.minAccuracy,
// 0
// );
// addLogLine('detectedTexts: ', textDetections);
const mlSyncConfig = await getMLSyncConfig();
const faceCropPromises = faceDetections.map(async (faceDetection) =>
arcfaceCropService.getFaceCrop(

View file

@ -52,10 +52,6 @@ export const DEFAULT_ML_SYNC_CONFIG: MLSyncConfig = {
method: 'ImageScene',
minScore: 0.1,
},
textDetection: {
method: 'Tesseract',
minAccuracy: 75,
},
// tsne: {
// samples: 200,
// dim: 2,
@ -86,10 +82,6 @@ export const BLAZEFACE_PASS1_SCORE_THRESHOLD = 0.4;
export const BLAZEFACE_FACE_SIZE = 112;
export const MOBILEFACENET_FACE_SIZE = 112;
export const TESSERACT_MIN_IMAGE_WIDTH = 44;
export const TESSERACT_MIN_IMAGE_HEIGHT = 20;
export const TESSERACT_MAX_IMAGE_DIMENSION = 720;
// scene detection model takes fixed-shaped (224x224) inputs
// https://tfhub.dev/sayannath/lite-model/image-scene/1
export const SCENE_DETECTION_IMAGE_SIZE = 224;

View file

@ -17,8 +17,6 @@ import {
MLLibraryData,
ObjectDetectionService,
ObjectDetectionMethod,
TextDetectionMethod,
TextDetectionService,
SceneDetectionService,
SceneDetectionMethod,
} from 'types/machineLearning';
@ -31,7 +29,6 @@ import blazeFaceDetectionService from './blazeFaceDetectionService';
import mobileFaceNetEmbeddingService from './mobileFaceNetEmbeddingService';
import dbscanClusteringService from './dbscanClusteringService';
import ssdMobileNetV2Service from './ssdMobileNetV2Service';
import tesseractService from './tesseractService';
import imageSceneService from './imageSceneService';
import { getDedicatedCryptoWorker } from 'utils/comlink/ComlinkCryptoWorker';
import { ComlinkWorker } from 'utils/comlink/comlinkWorker';
@ -69,16 +66,6 @@ export class MLFactory {
throw Error('Unknown scene detection method: ' + method);
}
public static getTextDetectionService(
method: TextDetectionMethod
): TextDetectionService {
if (method === 'Tesseract') {
return tesseractService;
}
throw Error('Unknown text detection method: ' + method);
}
public static getFaceCropService(method: FaceCropMethod) {
if (method === 'ArcFace') {
return arcfaceCropService;
@ -148,7 +135,6 @@ export class LocalMLSyncContext implements MLSyncContext {
public faceClusteringService: ClusteringService;
public objectDetectionService: ObjectDetectionService;
public sceneDetectionService: SceneDetectionService;
public textDetectionService: TextDetectionService;
public localFilesMap: Map<number, EnteFile>;
public outOfSyncFiles: EnteFile[];
@ -206,10 +192,6 @@ export class LocalMLSyncContext implements MLSyncContext {
this.config.sceneDetection.method
);
this.textDetectionService = MLFactory.getTextDetectionService(
this.config.textDetection.method
);
this.outOfSyncFiles = [];
this.nSyncedFiles = 0;
this.nSyncedFaces = 0;

View file

@ -1,184 +0,0 @@
import {
TextDetectionMethod,
TextDetectionService,
Versioned,
} from 'types/machineLearning';
import Tesseract, { createWorker } from 'tesseract.js';
import QueueProcessor from 'services/queueProcessor';
import { CustomError } from 'utils/error';
import { imageBitmapToBlob, resizeToSquare } from 'utils/image';
import { getFileType } from 'services/typeDetectionService';
import { FILE_TYPE } from 'constants/file';
import { makeID } from 'utils/user';
import {
TESSERACT_MAX_IMAGE_DIMENSION,
TESSERACT_MIN_IMAGE_HEIGHT,
TESSERACT_MIN_IMAGE_WIDTH,
TEXT_DETECTION_TIMEOUT_MS,
} from 'constants/machineLearning/config';
import { promiseWithTimeout } from 'utils/common/promiseTimeout';
import { addLogLine } from 'utils/logging';
const TESSERACT_MAX_CONCURRENT_PROCESSES = 4;
class TesseractService implements TextDetectionService {
public method: Versioned<TextDetectionMethod>;
private ready: Promise<void>;
private textDetector = new QueueProcessor<Tesseract.Word[] | Error>(
TESSERACT_MAX_CONCURRENT_PROCESSES
);
private tesseractWorkerPool = new Array<Tesseract.Worker>(
TESSERACT_MAX_CONCURRENT_PROCESSES
);
public constructor() {
this.method = {
value: 'Tesseract',
version: 1,
};
}
private async createTesseractWorker() {
const tesseractWorker = createWorker({
workerBlobURL: false,
workerPath: '/js/tesseract/worker.min.js',
corePath: '/js/tesseract/tesseract-core.wasm.js',
});
await tesseractWorker.load();
await tesseractWorker.loadLanguage('eng');
await tesseractWorker.initialize('eng');
await tesseractWorker.setParameters({
tessedit_char_whitelist:
'0123456789' +
'abcdefghijklmnopqrstuvwxyz' +
'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +
' ',
preserve_interword_spaces: '1',
});
return tesseractWorker;
}
private async init() {
for (let i = 0; i < TESSERACT_MAX_CONCURRENT_PROCESSES; i++) {
this.tesseractWorkerPool[i] = await this.createTesseractWorker();
addLogLine('loaded tesseract worker no', i);
}
addLogLine('loaded tesseract worker pool');
}
private async getTesseractWorker() {
if (!this.ready && typeof this.tesseractWorkerPool[0] === 'undefined') {
this.ready = this.init();
}
await this.ready;
return this.tesseractWorkerPool.shift();
}
private releaseWorker(tesseractWorker: Tesseract.Worker) {
this.tesseractWorkerPool.push(tesseractWorker);
}
async detectText(
imageBitmap: ImageBitmap,
minAccuracy: number,
attemptNumber: number
): Promise<Tesseract.Word[] | Error> {
const response = this.textDetector.queueUpRequest(() =>
this.detectTextUsingModel(imageBitmap, minAccuracy, attemptNumber)
);
try {
return await response.promise;
} catch (e) {
if (e.message === CustomError.REQUEST_CANCELLED) {
// ignore
return null;
} else {
throw e;
}
}
}
private detectTextUsingModel = async (
imageBitmap: ImageBitmap,
minAccuracy: number,
attemptNumber: number
) => {
const imageHeight = Math.min(imageBitmap.width, imageBitmap.height);
const imageWidth = Math.max(imageBitmap.width, imageBitmap.height);
if (
!(
imageWidth >= TESSERACT_MIN_IMAGE_WIDTH &&
imageHeight >= TESSERACT_MIN_IMAGE_HEIGHT
)
) {
addLogLine(
`file too small for tesseract- (${imageWidth},${imageHeight}) skipping text detection...`
);
return Error(
`file too small for tesseract- (${imageWidth},${imageHeight}) skipping text detection...`
);
}
if (imageHeight > TESSERACT_MAX_IMAGE_DIMENSION) {
addLogLine(
`original dimension (${imageBitmap.width}px,${imageBitmap.height}px)`
);
imageBitmap = resizeToSquare(
imageBitmap,
TESSERACT_MAX_IMAGE_DIMENSION
).image;
}
const file = new File(
[await imageBitmapToBlob(imageBitmap)],
'text-detection-dummy-image'
);
const fileTypeInfo = await getFileType(file);
if (
fileTypeInfo.fileType !== FILE_TYPE.IMAGE &&
!['png', 'jpg', 'bmp', 'pbm'].includes(fileTypeInfo.exactType)
) {
addLogLine(
`unsupported file type- ${fileTypeInfo.exactType}, skipping text detection....`
);
return Error(
`unsupported file type- ${fileTypeInfo.exactType}, skipping text detection....`
);
}
let tesseractWorker = await this.getTesseractWorker();
const id = makeID(6);
addLogLine(
`detecting text (${imageBitmap.width}px,${imageBitmap.height}px) fileType=${fileTypeInfo.exactType}`
);
try {
const startTime = Date.now();
const detections = (await promiseWithTimeout(
tesseractWorker.recognize(file),
TEXT_DETECTION_TIMEOUT_MS[attemptNumber]
)) as Tesseract.RecognizeResult;
addLogLine('detecting text ' + id, Date.now() - startTime, 'ms');
const filteredDetections = detections.data.words.filter(
({ confidence }) => confidence >= minAccuracy
);
return filteredDetections;
} catch (e) {
if (e.message === CustomError.WAIT_TIME_EXCEEDED) {
tesseractWorker?.terminate();
tesseractWorker = await this.createTesseractWorker();
}
throw e;
} finally {
this.releaseWorker(tesseractWorker);
}
};
public replaceWorkerWithNewOne() {}
public async dispose() {
for (let i = 0; i < TESSERACT_MAX_CONCURRENT_PROCESSES; i++) {
this.tesseractWorkerPool[i]?.terminate();
this.tesseractWorkerPool[i] = undefined;
}
}
}
export default new TesseractService();

View file

@ -1,106 +0,0 @@
import {
MLSyncContext,
MLSyncFileContext,
DetectedText,
WordGroup,
} from 'types/machineLearning';
import { addLogLine } from 'utils/logging';
import { isDifferentOrOld, getAllTextFromMap } from 'utils/machineLearning';
import mlIDbStorage from 'utils/storage/mlIDbStorage';
import ReaderService from './readerService';
class TextService {
async syncFileTextDetections(
syncContext: MLSyncContext,
fileContext: MLSyncFileContext,
textDetectionTimeoutIndex?: number
) {
const startTime = Date.now();
const { oldMlFile, newMlFile } = fileContext;
if (
!isDifferentOrOld(
oldMlFile?.textDetectionMethod,
syncContext.textDetectionService.method
) &&
oldMlFile?.imageSource === syncContext.config.imageSource &&
oldMlFile?.lastErrorMessage === null
) {
newMlFile.text = oldMlFile?.text;
newMlFile.imageSource = oldMlFile.imageSource;
newMlFile.imageDimensions = oldMlFile.imageDimensions;
newMlFile.textDetectionMethod = oldMlFile.textDetectionMethod;
return;
}
newMlFile.textDetectionMethod = syncContext.textDetectionService.method;
fileContext.newDetection = true;
const imageBitmap: ImageBitmap = await ReaderService.getImageBitmap(
syncContext,
fileContext
);
const textDetections =
await syncContext.textDetectionService.detectText(
imageBitmap,
syncContext.config.textDetection.minAccuracy,
oldMlFile?.errorCount ?? textDetectionTimeoutIndex ?? 0
);
if (textDetections instanceof Error) {
addLogLine(
`text detection time taken ${fileContext.enteFile.id}`,
Date.now() - startTime,
'ms'
);
newMlFile.errorCount = 2;
newMlFile.lastErrorMessage = textDetections.message;
return;
}
const detectedText: DetectedText[] = textDetections.map(
({ bbox, confidence, text }) => ({
fileID: fileContext.enteFile.id,
detection: { bbox, confidence, word: text.toLocaleLowerCase() },
})
);
newMlFile.text = detectedText;
addLogLine(
`text detection time taken ${fileContext.enteFile.id}`,
Date.now() - startTime,
'ms'
);
addLogLine(
'[MLService] Detected text: ',
fileContext.enteFile.metadata.title,
newMlFile.text?.length
);
}
async getAllSyncedTextMap(syncContext: MLSyncContext) {
if (syncContext.allSyncedTextMap) {
return syncContext.allSyncedTextMap;
}
syncContext.allSyncedTextMap = await mlIDbStorage.getAllTextMap();
return syncContext.allSyncedTextMap;
}
public async clusterWords(): Promise<WordGroup[]> {
const allTextMap = await mlIDbStorage.getAllTextMap();
const allText = getAllTextFromMap(allTextMap);
const textCluster = new Map<string, number[]>();
allText.map((text) => {
if (!textCluster.has(text.detection.word)) {
textCluster.set(text.detection.word, []);
}
const objectsInCluster = textCluster.get(text.detection.word);
objectsInCluster.push(text.fileID);
});
return [...textCluster.entries()]
.map(([word, files]) => ({
word,
files,
}))
.sort((a, b) => b.files.length - a.files.length);
}
}
export default new TextService();

View file

@ -16,7 +16,6 @@ import {
SuggestionType,
} from 'types/search';
import ObjectService from './machineLearning/objectService';
import textService from './machineLearning/textService';
import { getFormattedDate, isInsideBox, isSameDayAnyYear } from 'utils/search';
import { Person, Thing } from 'types/machineLearning';
import { getUniqueFiles } from 'utils/file';
@ -47,7 +46,6 @@ export const getAutoCompleteSuggestions =
getFileNameSuggestion(searchPhrase, files),
getFileCaptionSuggestion(searchPhrase, files),
...(await getThingSuggestion(searchPhrase)),
...(await getWordSuggestion(searchPhrase)),
];
return convertSuggestionsToOptions(suggestions, files);
@ -243,19 +241,6 @@ async function getThingSuggestion(searchPhrase: string): Promise<Suggestion[]> {
);
}
async function getWordSuggestion(searchPhrase: string): Promise<Suggestion[]> {
const wordResults = await searchText(searchPhrase);
return wordResults.map(
(searchResult) =>
({
type: SuggestionType.TEXT,
value: searchResult,
label: searchResult.word,
} as Suggestion)
);
}
function searchCollection(
searchPhrase: string,
collections: Collection[]
@ -328,13 +313,6 @@ async function searchThing(searchPhrase: string) {
);
}
async function searchText(searchPhrase: string) {
const texts = await textService.clusterWords();
return texts
.filter((text) => text.word.toLocaleLowerCase().includes(searchPhrase))
.slice(0, 4);
}
function isSearchedFile(user: User, file: EnteFile, search: Search) {
if (search?.collection) {
return search.collection === file.collectionID;

View file

@ -14,7 +14,6 @@ import { EnteFile } from 'types/file';
import { Config } from 'types/common/config';
import { Dimensions } from 'types/image';
import { Box, Point } from '../../../thirdparty/face-api/classes';
import Tesseract from 'tesseract.js';
export interface MLSyncResult {
nOutOfSyncFiles: number;
@ -97,8 +96,6 @@ export declare type ObjectDetectionMethod = 'SSDMobileNetV2';
export declare type SceneDetectionMethod = 'ImageScene';
export declare type TextDetectionMethod = 'Tesseract';
export declare type FaceCropMethod = 'ArcFace';
export declare type FaceAlignmentMethod =
@ -210,22 +207,10 @@ export interface WordGroup {
files: Array<number>;
}
export interface TextDetection {
bbox: Tesseract.Bbox;
word: string;
confidence: number;
}
export interface DetectedText {
fileID: number;
detection: TextDetection;
}
export interface MlFileData {
fileId: number;
faces?: Face[];
objects?: RealWorldObject[];
text?: DetectedText[];
imageSource?: ImageType;
imageDimensions?: Dimensions;
faceDetectionMethod?: Versioned<FaceDetectionMethod>;
@ -234,7 +219,6 @@ export interface MlFileData {
faceEmbeddingMethod?: Versioned<FaceEmbeddingMethod>;
objectDetectionMethod?: Versioned<ObjectDetectionMethod>;
sceneDetectionMethod?: Versioned<SceneDetectionMethod>;
textDetectionMethod?: Versioned<TextDetectionMethod>;
mlVersion: number;
errorCount: number;
lastErrorMessage?: string;
@ -256,11 +240,6 @@ export interface SceneDetectionConfig {
minScore: number;
}
export interface TextDetectionConfig {
method: TextDetectionMethod;
minAccuracy: number;
}
export interface FaceCropConfig {
enabled: boolean;
method: FaceCropMethod;
@ -399,17 +378,6 @@ export interface SceneDetectionService {
): Promise<ObjectDetection[]>;
}
export interface TextDetectionService {
method: Versioned<TextDetectionMethod>;
// init(): Promise<void>;
detectText(
imageBitmap: ImageBitmap,
minAccuracy: number,
attemptNumber: number
): Promise<Tesseract.Word[] | Error>;
dispose(): Promise<void>;
}
export interface FaceCropService {
method: Versioned<FaceCropMethod>;

@ -1 +0,0 @@
Subproject commit 73b42f8cb0bc36aee08a837b4ea70c919edab4cc

View file

@ -4426,8 +4426,6 @@ tapable@^2.2.0:
resolved "https://registry.yarnpkg.com/tapable/-/tapable-2.2.1.tgz#1967a73ef4060a82f12ab96af86d52fdb76eeca0"
integrity sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==
"tesseract.js@file:./thirdparty/tesseract":
version "0.0.0"
text-table@^0.2.0:
version "0.2.0"