123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165 |
- package model
- import (
- "io"
- "os"
- "path/filepath"
- "runtime/debug"
- "strings"
- "time"
- "github.com/88250/gulu"
- "github.com/dustin/go-humanize"
- "github.com/siyuan-note/logging"
- "github.com/siyuan-note/siyuan/kernel/cache"
- "github.com/siyuan-note/siyuan/kernel/task"
- "github.com/siyuan-note/siyuan/kernel/util"
- )
- func OCRAssetsJob() {
- if !util.TesseractEnabled {
- return
- }
- task.AppendTaskWithTimeout(task.OCRImage, 7*time.Second, autoOCRAssets)
- }
- func autoOCRAssets() {
- defer logging.Recover()
- assetsPath := util.GetDataAssetsAbsPath()
- assets := getUnOCRAssetsAbsPaths()
- if 0 < len(assets) {
- for i, assetAbsPath := range assets {
- text := util.Tesseract(assetAbsPath)
- p := strings.TrimPrefix(assetAbsPath, assetsPath)
- p = "assets" + filepath.ToSlash(p)
- util.AssetsTextsLock.Lock()
- util.AssetsTexts[p] = text
- util.AssetsTextsLock.Unlock()
- util.AssetsTextsChanged = true
- if 4 <= i { // 一次任务中最多处理 4 张图片,防止卡顿
- break
- }
- }
- }
- cleanNotExistAssetsTexts()
- }
- func cleanNotExistAssetsTexts() {
- util.AssetsTextsLock.Lock()
- defer util.AssetsTextsLock.Unlock()
- assetsPath := util.GetDataAssetsAbsPath()
- var toRemoves []string
- for asset, _ := range util.AssetsTexts {
- assetAbsPath := strings.TrimPrefix(asset, "assets")
- assetAbsPath = filepath.Join(assetsPath, assetAbsPath)
- if !gulu.File.IsExist(assetAbsPath) {
- toRemoves = append(toRemoves, asset)
- }
- }
- for _, asset := range toRemoves {
- delete(util.AssetsTexts, asset)
- util.AssetsTextsChanged = true
- }
- return
- }
- func getUnOCRAssetsAbsPaths() (ret []string) {
- var assetsPaths []string
- assets := cache.GetAssets()
- for _, asset := range assets {
- if !util.IsTesseractExtractable(asset.Path) {
- continue
- }
- assetsPaths = append(assetsPaths, asset.Path)
- }
- assetsPath := util.GetDataAssetsAbsPath()
- assetsTextsTmp := util.AssetsTexts
- for _, assetPath := range assetsPaths {
- if _, ok := assetsTextsTmp[assetPath]; ok {
- continue
- }
- absPath := filepath.Join(assetsPath, strings.TrimPrefix(assetPath, "assets"))
- ret = append(ret, absPath)
- }
- return
- }
- func FlushAssetsTextsJob() {
- SaveAssetsTexts()
- }
- func LoadAssetsTexts() {
- assetsPath := util.GetDataAssetsAbsPath()
- assetsTextsPath := filepath.Join(assetsPath, "ocr-texts.json")
- if !gulu.File.IsExist(assetsTextsPath) {
- return
- }
- start := time.Now()
- var err error
- fh, err := os.OpenFile(assetsTextsPath, os.O_RDWR, 0644)
- if nil != err {
- logging.LogErrorf("open assets texts failed: %s", err)
- return
- }
- defer fh.Close()
- data, err := io.ReadAll(fh)
- if nil != err {
- logging.LogErrorf("read assets texts failed: %s", err)
- return
- }
- util.AssetsTextsLock.Lock()
- if err = gulu.JSON.UnmarshalJSON(data, &util.AssetsTexts); nil != err {
- logging.LogErrorf("unmarshal assets texts failed: %s", err)
- if err = os.RemoveAll(assetsTextsPath); nil != err {
- logging.LogErrorf("removed corrupted assets texts failed: %s", err)
- }
- return
- }
- util.AssetsTextsLock.Unlock()
- debug.FreeOSMemory()
- if elapsed := time.Since(start).Seconds(); 2 < elapsed {
- logging.LogWarnf("read assets texts [%s] to [%s], elapsed [%.2fs]", humanize.Bytes(uint64(len(data))), assetsTextsPath, elapsed)
- }
- return
- }
- func SaveAssetsTexts() {
- if !util.AssetsTextsChanged {
- return
- }
- start := time.Now()
- util.AssetsTextsLock.Lock()
- data, err := gulu.JSON.MarshalIndentJSON(util.AssetsTexts, "", " ")
- if nil != err {
- logging.LogErrorf("marshal assets texts failed: %s", err)
- return
- }
- util.AssetsTextsLock.Unlock()
- assetsPath := util.GetDataAssetsAbsPath()
- assetsTextsPath := filepath.Join(assetsPath, "ocr-texts.json")
- if err = gulu.File.WriteFileSafer(assetsTextsPath, data, 0644); nil != err {
- logging.LogErrorf("write assets texts failed: %s", err)
- return
- }
- debug.FreeOSMemory()
- if elapsed := time.Since(start).Seconds(); 2 < elapsed {
- logging.LogWarnf("save assets texts [size=%s] to [%s], elapsed [%.2fs]", humanize.Bytes(uint64(len(data))), assetsTextsPath, elapsed)
- }
- util.AssetsTextsChanged = false
- }
|