🎨 OCR 未启用时不保存 ocr-texts.json https://github.com/siyuan-note/siyuan/issues/11171

This commit is contained in:
Daniel 2024-04-29 16:10:56 +08:00
parent 1e53010b4d
commit af694f6301
No known key found for this signature in database
GPG key ID: 86211BA83DF03017
5 changed files with 114 additions and 120 deletions

View file

@ -42,7 +42,7 @@ func main() {
model.BootSyncData()
model.InitBoxes()
model.LoadFlashcards()
model.LoadAssetsTexts()
util.LoadAssetsTexts()
util.SetBooted()
util.PushClearAllMsg()

View file

@ -621,7 +621,7 @@ func Close(force, setCurrentWorkspace bool, execInstallPkg int) (exitCode int) {
Conf.Close()
sql.CloseDatabase()
treenode.SaveBlockTree(false)
SaveAssetsTexts()
util.SaveAssetsTexts()
clearWorkspaceTemp()
clearCorruptedNotebooks()
clearPortJSON()

View file

@ -2,13 +2,9 @@ package model
import (
"path/filepath"
"runtime/debug"
"strings"
"time"
"github.com/88250/go-humanize"
"github.com/88250/gulu"
"github.com/siyuan-note/filelock"
"github.com/siyuan-note/logging"
"github.com/siyuan-note/siyuan/kernel/cache"
"github.com/siyuan-note/siyuan/kernel/sql"
@ -40,19 +36,14 @@ func autoOCRAssets() {
text := util.Tesseract(assetAbsPath)
p := strings.TrimPrefix(assetAbsPath, assetsPath)
p = "assets" + filepath.ToSlash(p)
util.AssetsTextsLock.Lock()
util.AssetsTexts[p] = text
util.AssetsTextsLock.Unlock()
if "" != text {
util.AssetsTextsChanged.Store(true)
}
util.SetAssetText(p, text)
if 7 <= i { // 一次任务中最多处理 7 张图片,防止长时间占用系统资源
break
}
}
}
cleanNotExistAssetsTexts()
util.CleanNotExistAssetsTexts()
// 刷新 OCR 结果到数据库
util.NodeOCRQueueLock.Lock()
@ -63,27 +54,6 @@ func autoOCRAssets() {
util.NodeOCRQueue = nil
}
func cleanNotExistAssetsTexts() {
util.AssetsTextsLock.Lock()
defer util.AssetsTextsLock.Unlock()
assetsPath := util.GetDataAssetsAbsPath()
var toRemoves []string
for asset, _ := range util.AssetsTexts {
assetAbsPath := strings.TrimPrefix(asset, "assets")
assetAbsPath = filepath.Join(assetsPath, assetAbsPath)
if !filelock.IsExist(assetAbsPath) {
toRemoves = append(toRemoves, asset)
}
}
for _, asset := range toRemoves {
delete(util.AssetsTexts, asset)
util.AssetsTextsChanged.Store(true)
}
return
}
func getUnOCRAssetsAbsPaths() (ret []string) {
var assetsPaths []string
assets := cache.GetAssets()
@ -95,9 +65,8 @@ func getUnOCRAssetsAbsPaths() (ret []string) {
}
assetsPath := util.GetDataAssetsAbsPath()
assetsTextsTmp := util.AssetsTexts
for _, assetPath := range assetsPaths {
if _, ok := assetsTextsTmp[assetPath]; ok {
if util.ExistsAssetText(assetPath) {
continue
}
absPath := filepath.Join(assetsPath, strings.TrimPrefix(assetPath, "assets"))
@ -107,66 +76,5 @@ func getUnOCRAssetsAbsPaths() (ret []string) {
}
func FlushAssetsTextsJob() {
SaveAssetsTexts()
}
func LoadAssetsTexts() {
assetsPath := util.GetDataAssetsAbsPath()
assetsTextsPath := filepath.Join(assetsPath, "ocr-texts.json")
if !filelock.IsExist(assetsTextsPath) {
return
}
start := time.Now()
data, err := filelock.ReadFile(assetsTextsPath)
if nil != err {
logging.LogErrorf("read assets texts failed: %s", err)
return
}
util.AssetsTextsLock.Lock()
if err = gulu.JSON.UnmarshalJSON(data, &util.AssetsTexts); nil != err {
logging.LogErrorf("unmarshal assets texts failed: %s", err)
if err = filelock.Remove(assetsTextsPath); nil != err {
logging.LogErrorf("removed corrupted assets texts failed: %s", err)
}
return
}
util.AssetsTextsLock.Unlock()
debug.FreeOSMemory()
if elapsed := time.Since(start).Seconds(); 2 < elapsed {
logging.LogWarnf("read assets texts [%s] to [%s], elapsed [%.2fs]", humanize.BytesCustomCeil(uint64(len(data)), 2), assetsTextsPath, elapsed)
}
return
}
func SaveAssetsTexts() {
if !util.AssetsTextsChanged.Load() {
return
}
start := time.Now()
util.AssetsTextsLock.Lock()
data, err := gulu.JSON.MarshalIndentJSON(util.AssetsTexts, "", " ")
if nil != err {
logging.LogErrorf("marshal assets texts failed: %s", err)
return
}
util.AssetsTextsLock.Unlock()
assetsPath := util.GetDataAssetsAbsPath()
assetsTextsPath := filepath.Join(assetsPath, "ocr-texts.json")
if err = filelock.WriteFile(assetsTextsPath, data); nil != err {
logging.LogErrorf("write assets texts failed: %s", err)
return
}
debug.FreeOSMemory()
if elapsed := time.Since(start).Seconds(); 2 < elapsed {
logging.LogWarnf("save assets texts [size=%s] to [%s], elapsed [%.2fs]", humanize.BytesCustomCeil(uint64(len(data)), 2), assetsTextsPath, elapsed)
}
util.AssetsTextsChanged.Store(false)
util.SaveAssetsTexts()
}

View file

@ -1451,7 +1451,7 @@ func processSyncMergeResult(exit, byHand bool, mergeResult *dejavu.MergeResult,
}
if needReloadOcrTexts {
LoadAssetsTexts()
util.LoadAssetsTexts()
}
if needReloadPlugin {

View file

@ -23,6 +23,7 @@ import (
"os"
"os/exec"
"path/filepath"
"runtime/debug"
"strconv"
"strings"
"sync"
@ -33,39 +34,124 @@ import (
"github.com/88250/gulu"
"github.com/88250/lute/ast"
"github.com/88250/lute/html"
"github.com/siyuan-note/filelock"
"github.com/siyuan-note/logging"
)
var (
TesseractBin = "tesseract"
TesseractEnabled bool
TesseractMaxSize = 2 * 1000 * uint64(1000)
AssetsTexts = map[string]string{}
AssetsTextsLock = sync.Mutex{}
AssetsTextsChanged = atomic.Bool{}
TesseractBin = "tesseract"
TesseractEnabled bool
TesseractMaxSize = 2 * 1000 * uint64(1000)
TesseractLangs []string
TesseractLangs []string
assetsTexts = map[string]string{}
assetsTextsLock = sync.Mutex{}
assetsTextsChanged = atomic.Bool{}
)
func CleanNotExistAssetsTexts() {
assetsTextsLock.Lock()
defer assetsTextsLock.Unlock()
assetsPath := GetDataAssetsAbsPath()
var toRemoves []string
for asset, _ := range assetsTexts {
assetAbsPath := strings.TrimPrefix(asset, "assets")
assetAbsPath = filepath.Join(assetsPath, assetAbsPath)
if !filelock.IsExist(assetAbsPath) {
toRemoves = append(toRemoves, asset)
}
}
for _, asset := range toRemoves {
delete(assetsTexts, asset)
assetsTextsChanged.Store(true)
}
return
}
func LoadAssetsTexts() {
assetsPath := GetDataAssetsAbsPath()
assetsTextsPath := filepath.Join(assetsPath, "ocr-texts.json")
if !filelock.IsExist(assetsTextsPath) {
return
}
start := time.Now()
data, err := filelock.ReadFile(assetsTextsPath)
if nil != err {
logging.LogErrorf("read assets texts failed: %s", err)
return
}
assetsTextsLock.Lock()
if err = gulu.JSON.UnmarshalJSON(data, &assetsTexts); nil != err {
logging.LogErrorf("unmarshal assets texts failed: %s", err)
if err = filelock.Remove(assetsTextsPath); nil != err {
logging.LogErrorf("removed corrupted assets texts failed: %s", err)
}
return
}
assetsTextsLock.Unlock()
debug.FreeOSMemory()
if elapsed := time.Since(start).Seconds(); 2 < elapsed {
logging.LogWarnf("read assets texts [%s] to [%s], elapsed [%.2fs]", humanize.BytesCustomCeil(uint64(len(data)), 2), assetsTextsPath, elapsed)
}
return
}
func SaveAssetsTexts() {
if !assetsTextsChanged.Load() || !TesseractEnabled {
return
}
start := time.Now()
assetsTextsLock.Lock()
data, err := gulu.JSON.MarshalIndentJSON(assetsTexts, "", " ")
if nil != err {
logging.LogErrorf("marshal assets texts failed: %s", err)
return
}
assetsTextsLock.Unlock()
assetsPath := GetDataAssetsAbsPath()
assetsTextsPath := filepath.Join(assetsPath, "ocr-texts.json")
if err = filelock.WriteFile(assetsTextsPath, data); nil != err {
logging.LogErrorf("write assets texts failed: %s", err)
return
}
debug.FreeOSMemory()
if elapsed := time.Since(start).Seconds(); 2 < elapsed {
logging.LogWarnf("save assets texts [size=%s] to [%s], elapsed [%.2fs]", humanize.BytesCustomCeil(uint64(len(data)), 2), assetsTextsPath, elapsed)
}
assetsTextsChanged.Store(false)
}
func SetAssetText(asset, text string) {
AssetsTextsLock.Lock()
AssetsTexts[asset] = text
AssetsTextsLock.Unlock()
AssetsTextsChanged.Store(true)
assetsTextsLock.Lock()
assetsTexts[asset] = text
assetsTextsLock.Unlock()
if "" != text {
assetsTextsChanged.Store(true)
}
}
func ExistsAssetText(asset string) (ret bool) {
AssetsTextsLock.Lock()
_, ret = AssetsTexts[asset]
AssetsTextsLock.Unlock()
assetsTextsLock.Lock()
_, ret = assetsTexts[asset]
assetsTextsLock.Unlock()
return
}
func GetAssetText(asset string, force bool) (ret string) {
if !force {
AssetsTextsLock.Lock()
ret = AssetsTexts[asset]
AssetsTextsLock.Unlock()
assetsTextsLock.Lock()
ret = assetsTexts[asset]
assetsTextsLock.Unlock()
return
}
@ -73,11 +159,11 @@ func GetAssetText(asset string, force bool) (ret string) {
assetAbsPath := strings.TrimPrefix(asset, "assets")
assetAbsPath = filepath.Join(assetsPath, assetAbsPath)
ret = Tesseract(assetAbsPath)
AssetsTextsLock.Lock()
AssetsTexts[asset] = ret
AssetsTextsLock.Unlock()
assetsTextsLock.Lock()
assetsTexts[asset] = ret
assetsTextsLock.Unlock()
if "" != ret {
AssetsTextsChanged.Store(true)
assetsTextsChanged.Store(true)
}
return
}