Преглед изворни кода

:art: 桌面端支持搜索图片 OCR 文本 https://github.com/siyuan-note/siyuan/issues/3470

Liang Ding пре 2 година
родитељ
комит
c4208596e4

+ 1 - 2
app/appearance/langs/en_US.json

@@ -1060,7 +1060,6 @@
     "182": "Sharing document, please wait...",
     "183": "Validating index document tree [%d/%d %s]",
     "184": "Powered by <a href=\"https://b3log.org/siyuan\" target=\"_blank\">SiYuan</a>",
-    "185": "Index verification complete",
-    "186": "Extracted text [%s] from asset [%s]"
+    "185": "Index verification complete"
   }
 }

+ 1 - 2
app/appearance/langs/es_ES.json

@@ -1060,7 +1060,6 @@
     "182": "Compartiendo documento, por favor espere...",
     "183": "Validando el árbol del documento de índice [%d/%d %s]",
     "184": "Con la tecnología de <a href=\"https://b3log.org/siyuan\" target=\"_blank\">SiYuan</a>",
-    "185": "Verificación de índice completada",
-    "186": "Texto extraído [%s] del recurso [%s]"
+    "185": "Verificación de índice completada"
   }
 }

+ 1 - 2
app/appearance/langs/fr_FR.json

@@ -1060,7 +1060,6 @@
     "182": "Partage du document, veuillez patienter...",
     "183": "Validation de l'arborescence du document d'index [%d/%d %s]",
     "184": "Propulsé par <a href=\"https://b3log.org/siyuan\" target=\"_blank\">SiYuan</a>",
-    "185": "Vérification de l'index terminée",
-    "186": "Texte extrait [%s] de l'actif [%s]"
+    "185": "Vérification de l'index terminée"
   }
 }

+ 3 - 4
app/appearance/langs/zh_CHT.json

@@ -31,8 +31,8 @@
   "leftRightLayout": "左右佈局",
   "topBottomLayout": "上下佈局",
   "keyword": "關鍵字",
-  "searchMethod":"搜索方式",
-  "regex":"正則表達式",
+  "searchMethod": "搜索方式",
+  "regex": "正則表達式",
   "keywordsLimit": "關鍵字數量限制",
   "exportAsImage": "導出為圖片",
   "exportBySiYuan": "由思源筆記導出",
@@ -1060,7 +1060,6 @@
     "182": "正在分享文檔,請稍等...",
     "183": "正在校驗索引文檔樹 [%d/%d %s]",
     "184": "由<a href=\"https://b3log.org/siyuan\" target=\"_blank\">思源筆記</a>強力驅動",
-    "185": "索引校驗完畢",
-    "186": "已提取資源文件 [%s] 圖片中的文本 [%s]"
+    "185": "索引校驗完畢"
   }
 }

+ 1 - 2
app/appearance/langs/zh_CN.json

@@ -1060,7 +1060,6 @@
     "182": "正在分享文档,请稍等...",
     "183": "正在校验索引文档树 [%d/%d %s]",
     "184": "由<a href=\"https://b3log.org/siyuan\" target=\"_blank\">思源笔记</a>强力驱动",
-    "185": "索引校验完毕",
-    "186": "已识别资源文件 [%s] 图片中的文本 [%s]"
+    "185": "索引校验完毕"
   }
 }

+ 3 - 3
kernel/main.go

@@ -40,7 +40,7 @@ func main() {
 	model.BootSyncData()
 	model.InitBoxes()
 	model.InitFlashcards()
-	util.LoadAssetsTexts()
+	model.LoadAssetsTexts()
 
 	go model.AutoGenerateDocHistory()
 	go model.AutoSync()
@@ -53,8 +53,8 @@ func main() {
 	go treenode.AutoFlushBlockTree()
 	go cache.LoadAssets()
 	go model.AutoFixIndex()
-	go util.AutoOCRAssets()
-	go util.AutoFlushAssetsTexts()
+	go model.AutoOCRAssets()
+	go model.AutoFlushAssetsTexts()
 	go model.HookDesktopUIProc()
 	model.WatchAssets()
 	model.HandleSignal()

+ 3 - 3
kernel/mobile/kernel.go

@@ -54,7 +54,7 @@ func StartKernel(container, appDir, workspaceBaseDir, timezoneID, localIPs, lang
 		model.BootSyncData()
 		model.InitBoxes()
 		model.InitFlashcards()
-		util.LoadAssetsTexts()
+		model.LoadAssetsTexts()
 
 		go model.AutoGenerateDocHistory()
 		go model.AutoSync()
@@ -67,8 +67,8 @@ func StartKernel(container, appDir, workspaceBaseDir, timezoneID, localIPs, lang
 		go treenode.AutoFlushBlockTree()
 		go cache.LoadAssets()
 		go model.AutoFixIndex()
-		go util.AutoOCRAssets()
-		go util.AutoFlushAssetsTexts()
+		go model.AutoOCRAssets()
+		go model.AutoFlushAssetsTexts()
 	}()
 }
 

+ 0 - 1
kernel/model/assets.go

@@ -197,7 +197,6 @@ func NetImg2LocalAssets(rootID string) (err error) {
 		if err = writeJSONQueue(tree); nil != err {
 			return
 		}
-		sql.WaitForWritingDatabase()
 		util.PushUpdateMsg(msgId, fmt.Sprintf(Conf.Language(120), files), 5000)
 	} else {
 		util.PushUpdateMsg(msgId, Conf.Language(121), 3000)

+ 1 - 1
kernel/model/conf.go

@@ -428,7 +428,7 @@ func Close(force bool, execInstallPkg int) (exitCode int) {
 	Conf.Close()
 	sql.CloseDatabase()
 	treenode.SaveBlockTree(false)
-	util.SaveAssetsTexts()
+	SaveAssetsTexts()
 	clearWorkspaceTemp()
 	clearPortJSON()
 	util.UnlockWorkspace()

+ 184 - 0
kernel/model/ocr.go

@@ -0,0 +1,184 @@
+package model
+
+import (
+	"github.com/dustin/go-humanize"
+	"io"
+	"os"
+	"path/filepath"
+	"runtime"
+	"runtime/debug"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/88250/gulu"
+	"github.com/panjf2000/ants/v2"
+	"github.com/siyuan-note/logging"
+	"github.com/siyuan-note/siyuan/kernel/cache"
+	"github.com/siyuan-note/siyuan/kernel/util"
+)
+
+func AutoOCRAssets() {
+	if !util.TesseractEnabled {
+		return
+	}
+
+	for {
+		autoOCRAssets()
+		time.Sleep(7 * time.Second)
+	}
+}
+
+func autoOCRAssets() {
+	defer logging.Recover()
+
+	assetsPath := util.GetDataAssetsAbsPath()
+	assets := getUnOCRAssetsAbsPaths()
+
+	poolSize := runtime.NumCPU()
+	if 4 < poolSize {
+		poolSize = 4
+	}
+	waitGroup := &sync.WaitGroup{}
+	p, _ := ants.NewPoolWithFunc(poolSize, func(arg interface{}) {
+		defer waitGroup.Done()
+
+		assetAbsPath := arg.(string)
+		text := util.Tesseract(assetAbsPath)
+		p := strings.TrimPrefix(assetAbsPath, assetsPath)
+		p = "assets" + filepath.ToSlash(p)
+		util.AssetsTextsLock.Lock()
+		util.AssetsTexts[p] = text
+		util.AssetsTextsLock.Unlock()
+		util.AssetsTextsChanged = true
+	})
+	for _, assetAbsPath := range assets {
+		waitGroup.Add(1)
+		p.Invoke(assetAbsPath)
+	}
+	waitGroup.Wait()
+	p.Release()
+
+	cleanNotFoundAssetsTexts()
+}
+
+func cleanNotFoundAssetsTexts() {
+	tmp := util.AssetsTexts
+
+	assetsPath := util.GetDataAssetsAbsPath()
+	var toRemoves []string
+	for asset, _ := range tmp {
+		assetAbsPath := strings.TrimPrefix(asset, "assets")
+		assetAbsPath = filepath.Join(assetsPath, assetAbsPath)
+		if !gulu.File.IsExist(assetAbsPath) {
+			toRemoves = append(toRemoves, asset)
+		}
+	}
+
+	util.AssetsTextsLock.Lock()
+	for _, asset := range toRemoves {
+		delete(util.AssetsTexts, asset)
+		util.AssetsTextsChanged = true
+	}
+	util.AssetsTextsLock.Unlock()
+	return
+}
+
+func getUnOCRAssetsAbsPaths() (ret []string) {
+	var assetsPaths []string
+	assets := cache.GetAssets()
+	for _, asset := range assets {
+		lowerName := strings.ToLower(asset.Path)
+		if !strings.HasSuffix(lowerName, ".png") && !strings.HasSuffix(lowerName, ".jpg") && !strings.HasSuffix(lowerName, ".jpeg") {
+			continue
+		}
+		assetsPaths = append(assetsPaths, asset.Path)
+	}
+
+	assetsPath := util.GetDataAssetsAbsPath()
+	assetsTextsTmp := util.AssetsTexts
+	for _, assetPath := range assetsPaths {
+		if _, ok := assetsTextsTmp[assetPath]; ok {
+			continue
+		}
+		absPath := filepath.Join(assetsPath, strings.TrimPrefix(assetPath, "assets"))
+		ret = append(ret, absPath)
+	}
+	return
+}
+
+func AutoFlushAssetsTexts() {
+	for {
+		SaveAssetsTexts()
+		time.Sleep(7 * time.Second)
+	}
+}
+
+func LoadAssetsTexts() {
+	assetsPath := util.GetDataAssetsAbsPath()
+	assetsTextsPath := filepath.Join(assetsPath, "ocr-texts.json")
+	if !gulu.File.IsExist(assetsTextsPath) {
+		return
+	}
+
+	start := time.Now()
+	var err error
+	fh, err := os.OpenFile(assetsTextsPath, os.O_RDWR, 0644)
+	if nil != err {
+		logging.LogErrorf("open assets texts failed: %s", err)
+		return
+	}
+	defer fh.Close()
+
+	data, err := io.ReadAll(fh)
+	if nil != err {
+		logging.LogErrorf("read assets texts failed: %s", err)
+		return
+	}
+
+	util.AssetsTextsLock.Lock()
+	if err = gulu.JSON.UnmarshalJSON(data, &util.AssetsTexts); nil != err {
+		logging.LogErrorf("unmarshal assets texts failed: %s", err)
+		if err = os.RemoveAll(assetsTextsPath); nil != err {
+			logging.LogErrorf("removed corrupted assets texts failed: %s", err)
+		}
+		return
+	}
+	util.AssetsTextsLock.Unlock()
+	debug.FreeOSMemory()
+
+	if elapsed := time.Since(start).Seconds(); 2 < elapsed {
+		logging.LogWarnf("read assets texts [%s] to [%s], elapsed [%.2fs]", humanize.Bytes(uint64(len(data))), assetsTextsPath, elapsed)
+	}
+	return
+}
+
+func SaveAssetsTexts() {
+	if !util.AssetsTextsChanged {
+		return
+	}
+
+	start := time.Now()
+
+	util.AssetsTextsLock.Lock()
+	data, err := gulu.JSON.MarshalIndentJSON(util.AssetsTexts, "", "  ")
+	if nil != err {
+		logging.LogErrorf("marshal assets texts failed: %s", err)
+		return
+	}
+	util.AssetsTextsLock.Unlock()
+
+	assetsPath := util.GetDataAssetsAbsPath()
+	assetsTextsPath := filepath.Join(assetsPath, "ocr-texts.json")
+	if err = gulu.File.WriteFileSafer(assetsTextsPath, data, 0644); nil != err {
+		logging.LogErrorf("write assets texts failed: %s", err)
+		return
+	}
+	debug.FreeOSMemory()
+
+	if elapsed := time.Since(start).Seconds(); 2 < elapsed {
+		logging.LogWarnf("save assets texts [size=%s] to [%s], elapsed [%.2fs]", humanize.Bytes(uint64(len(data))), assetsTextsPath, elapsed)
+	}
+
+	util.AssetsTextsChanged = false
+}

+ 1 - 1
kernel/treenode/node.go

@@ -18,6 +18,7 @@ package treenode
 
 import (
 	"bytes"
+	util2 "github.com/siyuan-note/siyuan/kernel/util"
 	"strings"
 	"sync"
 
@@ -31,7 +32,6 @@ import (
 	"github.com/88250/lute/render"
 	"github.com/88250/lute/util"
 	"github.com/siyuan-note/logging"
-	util2 "github.com/siyuan-note/siyuan/kernel/util"
 )
 
 func GetBlockRef(n *ast.Node) (blockRefID, blockRefText, blockRefSubtype string) {

+ 0 - 338
kernel/util/ocr.go

@@ -1,338 +0,0 @@
-// SiYuan - Build Your Eternal Digital Garden
-// Copyright (c) 2020-present, b3log.org
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
-package util
-
-import (
-	"bytes"
-	"context"
-	"io"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"regexp"
-	"runtime"
-	"runtime/debug"
-	"strings"
-	"sync"
-	"time"
-
-	"github.com/88250/gulu"
-	"github.com/dustin/go-humanize"
-	"github.com/panjf2000/ants/v2"
-	"github.com/siyuan-note/logging"
-)
-
-var (
-	tesseractEnabled   bool
-	tesseractLangs     []string
-	assetsTexts        = map[string]string{}
-	assetsTextsLock    = sync.Mutex{}
-	assetsTextsChanged = false
-)
-
-func GetAssetText(asset string) string {
-	assetsTextsLock.Lock()
-	ret, ok := assetsTexts[asset]
-	assetsTextsLock.Unlock()
-	if ok {
-		return ret
-	}
-
-	assetsPath := GetDataAssetsAbsPath()
-	assetAbsPath := strings.TrimPrefix(asset, "assets")
-	assetAbsPath = filepath.Join(assetsPath, assetAbsPath)
-	ret = Tesseract(assetAbsPath)
-	assetsTextsLock.Lock()
-	assetsTexts[asset] = ret
-	assetsTextsLock.Unlock()
-	return ret
-}
-
-func Tesseract(imgAbsPath string) string {
-	if ContainerStd != Container || !tesseractEnabled {
-		return ""
-	}
-
-	info, err := os.Stat(imgAbsPath)
-	if nil != err {
-		return ""
-	}
-
-	defer logging.Recover()
-
-	ctx, cancel := context.WithTimeout(context.Background(), 7*time.Second)
-	defer cancel()
-
-	now := time.Now()
-	cmd := exec.CommandContext(ctx, "tesseract", "-c", "debug_file=/dev/null", imgAbsPath, "stdout", "-l", strings.Join(tesseractLangs, "+"))
-	gulu.CmdAttr(cmd)
-	output, err := cmd.CombinedOutput()
-	if ctx.Err() == context.DeadlineExceeded {
-		logging.LogWarnf("tesseract [path=%s, size=%d] timeout", imgAbsPath, info.Size())
-		return ""
-	}
-
-	if nil != err {
-		logging.LogWarnf("tesseract [path=%s, size=%d] failed: %s", imgAbsPath, info.Size(), err)
-		return ""
-	}
-
-	ret := string(output)
-	ret = strings.ReplaceAll(ret, "\r", "")
-	ret = strings.ReplaceAll(ret, "\n", "")
-	ret = strings.ReplaceAll(ret, "\t", " ")
-	reg := regexp.MustCompile("\\s{2,}")
-	ret = reg.ReplaceAllString(ret, " ")
-	logging.LogInfof("tesseract [path=%s, size=%d, text=%s, elapsed=%dms]", imgAbsPath, info.Size(), ret, time.Since(now).Milliseconds())
-	return ret
-}
-
-func AutoOCRAssets() {
-	if !tesseractEnabled {
-		return
-	}
-
-	for {
-		autoOCRAssets()
-		time.Sleep(7 * time.Second)
-	}
-}
-
-func autoOCRAssets() {
-	defer logging.Recover()
-
-	assetsPath := GetDataAssetsAbsPath()
-	assets := getUnOCRAssetsAbsPaths()
-
-	poolSize := runtime.NumCPU()
-	if 4 < poolSize {
-		poolSize = 4
-	}
-	waitGroup := &sync.WaitGroup{}
-	p, _ := ants.NewPoolWithFunc(poolSize, func(arg interface{}) {
-		defer waitGroup.Done()
-
-		assetAbsPath := arg.(string)
-		text := Tesseract(assetAbsPath)
-		p := strings.TrimPrefix(assetAbsPath, assetsPath)
-		p = "assets" + filepath.ToSlash(p)
-		assetsTextsLock.Lock()
-		assetsTexts[p] = text
-		assetsTextsLock.Unlock()
-		assetsTextsChanged = true
-	})
-	for _, assetAbsPath := range assets {
-		waitGroup.Add(1)
-		p.Invoke(assetAbsPath)
-	}
-	waitGroup.Wait()
-	p.Release()
-
-	cleanNotFoundAssetsTexts()
-}
-
-func cleanNotFoundAssetsTexts() {
-	tmp := assetsTexts
-
-	assetsPath := GetDataAssetsAbsPath()
-	var toRemoves []string
-	for asset, _ := range tmp {
-		assetAbsPath := strings.TrimPrefix(asset, "assets")
-		assetAbsPath = filepath.Join(assetsPath, assetAbsPath)
-		if !gulu.File.IsExist(assetAbsPath) {
-			toRemoves = append(toRemoves, asset)
-		}
-	}
-
-	assetsTextsLock.Lock()
-	for _, asset := range toRemoves {
-		delete(assetsTexts, asset)
-		assetsTextsChanged = true
-	}
-	assetsTextsLock.Unlock()
-	return
-}
-
-func getUnOCRAssetsAbsPaths() (ret []string) {
-	assetsPath := GetDataAssetsAbsPath()
-	var assetsPaths []string
-	filepath.Walk(assetsPath, func(path string, info os.FileInfo, err error) error {
-		name := info.Name()
-		if info.IsDir() {
-			if strings.HasPrefix(name, ".") {
-				return filepath.SkipDir
-			}
-			return nil
-		}
-
-		lowerName := strings.ToLower(name)
-		if !strings.HasSuffix(lowerName, ".png") && !strings.HasSuffix(lowerName, ".jpg") && !strings.HasSuffix(lowerName, ".jpeg") {
-			return nil
-		}
-
-		assetsPaths = append(assetsPaths, path)
-		return nil
-	})
-
-	assetsTextsTmp := assetsTexts
-	for _, absPath := range assetsPaths {
-		p := strings.TrimPrefix(absPath, assetsPath)
-		p = "assets" + filepath.ToSlash(p)
-		if _, ok := assetsTextsTmp[p]; ok {
-			continue
-		}
-		ret = append(ret, absPath)
-	}
-	return
-}
-
-func AutoFlushAssetsTexts() {
-	for {
-		SaveAssetsTexts()
-		time.Sleep(7 * time.Second)
-	}
-}
-
-func LoadAssetsTexts() {
-	assetsPath := GetDataAssetsAbsPath()
-	assetsTextsPath := filepath.Join(assetsPath, "ocr-texts.json")
-	if !gulu.File.IsExist(assetsTextsPath) {
-		return
-	}
-
-	start := time.Now()
-	var err error
-	fh, err := os.OpenFile(assetsTextsPath, os.O_RDWR, 0644)
-	if nil != err {
-		logging.LogErrorf("open assets texts failed: %s", err)
-		return
-	}
-	defer fh.Close()
-
-	data, err := io.ReadAll(fh)
-	if nil != err {
-		logging.LogErrorf("read assets texts failed: %s", err)
-		return
-	}
-
-	assetsTextsLock.Lock()
-	if err = gulu.JSON.UnmarshalJSON(data, &assetsTexts); nil != err {
-		logging.LogErrorf("unmarshal assets texts failed: %s", err)
-		if err = os.RemoveAll(assetsTextsPath); nil != err {
-			logging.LogErrorf("removed corrupted assets texts failed: %s", err)
-		}
-		return
-	}
-	assetsTextsLock.Unlock()
-	debug.FreeOSMemory()
-
-	if elapsed := time.Since(start).Seconds(); 2 < elapsed {
-		logging.LogWarnf("read assets texts [%s] to [%s], elapsed [%.2fs]", humanize.Bytes(uint64(len(data))), assetsTextsPath, elapsed)
-	}
-	return
-}
-
-func SaveAssetsTexts() {
-	if !assetsTextsChanged {
-		return
-	}
-
-	start := time.Now()
-
-	assetsTextsLock.Lock()
-	data, err := gulu.JSON.MarshalIndentJSON(assetsTexts, "", "  ")
-	if nil != err {
-		logging.LogErrorf("marshal assets texts failed: %s", err)
-		return
-	}
-	assetsTextsLock.Unlock()
-
-	assetsPath := GetDataAssetsAbsPath()
-	assetsTextsPath := filepath.Join(assetsPath, "ocr-texts.json")
-	if err = gulu.File.WriteFileSafer(assetsTextsPath, data, 0644); nil != err {
-		logging.LogErrorf("write assets texts failed: %s", err)
-		return
-	}
-	debug.FreeOSMemory()
-
-	if elapsed := time.Since(start).Seconds(); 2 < elapsed {
-		logging.LogWarnf("save assets texts [size=%s] to [%s], elapsed [%.2fs]", humanize.Bytes(uint64(len(data))), assetsTextsPath, elapsed)
-	}
-
-	assetsTextsChanged = false
-}
-
-func initTesseract() {
-	ver := getTesseractVer()
-	if "" == ver {
-		return
-	}
-
-	tesseractLangs = getTesseractLangs()
-	if 1 > len(tesseractLangs) {
-		logging.LogWarnf("no tesseract langs found")
-		tesseractEnabled = false
-		return
-	}
-	logging.LogInfof("tesseract-ocr enabled [ver=%s, langs=%s]", ver, strings.Join(tesseractLangs, "+"))
-}
-
-func getTesseractVer() (ret string) {
-	if ContainerStd != Container {
-		return
-	}
-
-	cmd := exec.Command("tesseract", "--version")
-	gulu.CmdAttr(cmd)
-	data, err := cmd.CombinedOutput()
-	if nil == err && strings.HasPrefix(string(data), "tesseract ") {
-		parts := bytes.Split(data, []byte("\n"))
-		if 0 < len(parts) {
-			ret = strings.TrimPrefix(string(parts[0]), "tesseract ")
-			ret = strings.TrimSpace(ret)
-			tesseractEnabled = true
-		}
-		return
-	}
-	return
-}
-
-func getTesseractLangs() (ret []string) {
-	if !tesseractEnabled {
-		return nil
-	}
-
-	cmd := exec.Command("tesseract", "--list-langs")
-	gulu.CmdAttr(cmd)
-	data, err := cmd.CombinedOutput()
-	if nil != err {
-		return nil
-	}
-
-	parts := bytes.Split(data, []byte("\n"))
-	if 0 < len(parts) {
-		parts = parts[1:]
-	}
-	for _, part := range parts {
-		part = bytes.TrimSpace(part)
-		if 0 == len(part) {
-			continue
-		}
-		ret = append(ret, string(part))
-	}
-	return
-}

+ 162 - 0
kernel/util/tesseract.go

@@ -0,0 +1,162 @@
+// SiYuan - Build Your Eternal Digital Garden
+// Copyright (c) 2020-present, b3log.org
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+package util
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"regexp"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/88250/gulu"
+	"github.com/siyuan-note/logging"
+)
+
+var (
+	TesseractEnabled   bool
+	AssetsTexts        = map[string]string{}
+	AssetsTextsLock    = sync.Mutex{}
+	AssetsTextsChanged = false
+
+	tesseractLangs []string
+)
+
+func GetAssetText(asset string) string {
+	AssetsTextsLock.Lock()
+	ret, ok := AssetsTexts[asset]
+	AssetsTextsLock.Unlock()
+	if ok {
+		return ret
+	}
+
+	assetsPath := GetDataAssetsAbsPath()
+	assetAbsPath := strings.TrimPrefix(asset, "assets")
+	assetAbsPath = filepath.Join(assetsPath, assetAbsPath)
+	ret = Tesseract(assetAbsPath)
+	AssetsTextsLock.Lock()
+	AssetsTexts[asset] = ret
+	AssetsTextsLock.Unlock()
+	return ret
+}
+
+func Tesseract(imgAbsPath string) string {
+	if ContainerStd != Container || !TesseractEnabled {
+		return ""
+	}
+
+	info, err := os.Stat(imgAbsPath)
+	if nil != err {
+		return ""
+	}
+
+	defer logging.Recover()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 7*time.Second)
+	defer cancel()
+
+	now := time.Now()
+	cmd := exec.CommandContext(ctx, "tesseract", "-c", "debug_file=/dev/null", imgAbsPath, "stdout", "-l", strings.Join(tesseractLangs, "+"))
+	gulu.CmdAttr(cmd)
+	output, err := cmd.CombinedOutput()
+	if ctx.Err() == context.DeadlineExceeded {
+		logging.LogWarnf("tesseract [path=%s, size=%d] timeout", imgAbsPath, info.Size())
+		return ""
+	}
+
+	if nil != err {
+		logging.LogWarnf("tesseract [path=%s, size=%d] failed: %s", imgAbsPath, info.Size(), err)
+		return ""
+	}
+
+	ret := string(output)
+	ret = strings.ReplaceAll(ret, "\r", "")
+	ret = strings.ReplaceAll(ret, "\n", "")
+	ret = strings.ReplaceAll(ret, "\t", " ")
+	reg := regexp.MustCompile("\\s{2,}")
+	ret = reg.ReplaceAllString(ret, " ")
+	logging.LogInfof("tesseract [path=%s, size=%d, text=%s, elapsed=%dms]", imgAbsPath, info.Size(), ret, time.Since(now).Milliseconds())
+	msg := fmt.Sprintf("OCR [%s] [%s]", info.Name(), ret)
+	PushStatusBar(msg)
+	return ret
+}
+
+func initTesseract() {
+	ver := getTesseractVer()
+	if "" == ver {
+		return
+	}
+
+	tesseractLangs = getTesseractLangs()
+	if 1 > len(tesseractLangs) {
+		logging.LogWarnf("no tesseract langs found")
+		TesseractEnabled = false
+		return
+	}
+	logging.LogInfof("tesseract-ocr enabled [ver=%s, langs=%s]", ver, strings.Join(tesseractLangs, "+"))
+}
+
+func getTesseractVer() (ret string) {
+	if ContainerStd != Container {
+		return
+	}
+
+	cmd := exec.Command("tesseract", "--version")
+	gulu.CmdAttr(cmd)
+	data, err := cmd.CombinedOutput()
+	if nil == err && strings.HasPrefix(string(data), "tesseract ") {
+		parts := bytes.Split(data, []byte("\n"))
+		if 0 < len(parts) {
+			ret = strings.TrimPrefix(string(parts[0]), "tesseract ")
+			ret = strings.TrimSpace(ret)
+			TesseractEnabled = true
+		}
+		return
+	}
+	return
+}
+
+func getTesseractLangs() (ret []string) {
+	if !TesseractEnabled {
+		return nil
+	}
+
+	cmd := exec.Command("tesseract", "--list-langs")
+	gulu.CmdAttr(cmd)
+	data, err := cmd.CombinedOutput()
+	if nil != err {
+		return nil
+	}
+
+	parts := bytes.Split(data, []byte("\n"))
+	if 0 < len(parts) {
+		parts = parts[1:]
+	}
+	for _, part := range parts {
+		part = bytes.TrimSpace(part)
+		if 0 == len(part) {
+			continue
+		}
+		ret = append(ret, string(part))
+	}
+	return
+}