🎨 OCR 未启用时不保存 ocr-texts.json https://github.com/siyuan-note/siyuan/issues/11171
This commit is contained in:
parent
1e53010b4d
commit
af694f6301
5 changed files with 114 additions and 120 deletions
|
@ -42,7 +42,7 @@ func main() {
|
|||
model.BootSyncData()
|
||||
model.InitBoxes()
|
||||
model.LoadFlashcards()
|
||||
model.LoadAssetsTexts()
|
||||
util.LoadAssetsTexts()
|
||||
|
||||
util.SetBooted()
|
||||
util.PushClearAllMsg()
|
||||
|
|
|
@ -621,7 +621,7 @@ func Close(force, setCurrentWorkspace bool, execInstallPkg int) (exitCode int) {
|
|||
Conf.Close()
|
||||
sql.CloseDatabase()
|
||||
treenode.SaveBlockTree(false)
|
||||
SaveAssetsTexts()
|
||||
util.SaveAssetsTexts()
|
||||
clearWorkspaceTemp()
|
||||
clearCorruptedNotebooks()
|
||||
clearPortJSON()
|
||||
|
|
|
@ -2,13 +2,9 @@ package model
|
|||
|
||||
import (
|
||||
"path/filepath"
|
||||
"runtime/debug"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/88250/go-humanize"
|
||||
"github.com/88250/gulu"
|
||||
"github.com/siyuan-note/filelock"
|
||||
"github.com/siyuan-note/logging"
|
||||
"github.com/siyuan-note/siyuan/kernel/cache"
|
||||
"github.com/siyuan-note/siyuan/kernel/sql"
|
||||
|
@ -40,19 +36,14 @@ func autoOCRAssets() {
|
|||
text := util.Tesseract(assetAbsPath)
|
||||
p := strings.TrimPrefix(assetAbsPath, assetsPath)
|
||||
p = "assets" + filepath.ToSlash(p)
|
||||
util.AssetsTextsLock.Lock()
|
||||
util.AssetsTexts[p] = text
|
||||
util.AssetsTextsLock.Unlock()
|
||||
if "" != text {
|
||||
util.AssetsTextsChanged.Store(true)
|
||||
}
|
||||
util.SetAssetText(p, text)
|
||||
if 7 <= i { // 一次任务中最多处理 7 张图片,防止长时间占用系统资源
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cleanNotExistAssetsTexts()
|
||||
util.CleanNotExistAssetsTexts()
|
||||
|
||||
// 刷新 OCR 结果到数据库
|
||||
util.NodeOCRQueueLock.Lock()
|
||||
|
@ -63,27 +54,6 @@ func autoOCRAssets() {
|
|||
util.NodeOCRQueue = nil
|
||||
}
|
||||
|
||||
func cleanNotExistAssetsTexts() {
|
||||
util.AssetsTextsLock.Lock()
|
||||
defer util.AssetsTextsLock.Unlock()
|
||||
|
||||
assetsPath := util.GetDataAssetsAbsPath()
|
||||
var toRemoves []string
|
||||
for asset, _ := range util.AssetsTexts {
|
||||
assetAbsPath := strings.TrimPrefix(asset, "assets")
|
||||
assetAbsPath = filepath.Join(assetsPath, assetAbsPath)
|
||||
if !filelock.IsExist(assetAbsPath) {
|
||||
toRemoves = append(toRemoves, asset)
|
||||
}
|
||||
}
|
||||
|
||||
for _, asset := range toRemoves {
|
||||
delete(util.AssetsTexts, asset)
|
||||
util.AssetsTextsChanged.Store(true)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func getUnOCRAssetsAbsPaths() (ret []string) {
|
||||
var assetsPaths []string
|
||||
assets := cache.GetAssets()
|
||||
|
@ -95,9 +65,8 @@ func getUnOCRAssetsAbsPaths() (ret []string) {
|
|||
}
|
||||
|
||||
assetsPath := util.GetDataAssetsAbsPath()
|
||||
assetsTextsTmp := util.AssetsTexts
|
||||
for _, assetPath := range assetsPaths {
|
||||
if _, ok := assetsTextsTmp[assetPath]; ok {
|
||||
if util.ExistsAssetText(assetPath) {
|
||||
continue
|
||||
}
|
||||
absPath := filepath.Join(assetsPath, strings.TrimPrefix(assetPath, "assets"))
|
||||
|
@ -107,66 +76,5 @@ func getUnOCRAssetsAbsPaths() (ret []string) {
|
|||
}
|
||||
|
||||
func FlushAssetsTextsJob() {
|
||||
SaveAssetsTexts()
|
||||
}
|
||||
|
||||
func LoadAssetsTexts() {
|
||||
assetsPath := util.GetDataAssetsAbsPath()
|
||||
assetsTextsPath := filepath.Join(assetsPath, "ocr-texts.json")
|
||||
if !filelock.IsExist(assetsTextsPath) {
|
||||
return
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
data, err := filelock.ReadFile(assetsTextsPath)
|
||||
if nil != err {
|
||||
logging.LogErrorf("read assets texts failed: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
util.AssetsTextsLock.Lock()
|
||||
if err = gulu.JSON.UnmarshalJSON(data, &util.AssetsTexts); nil != err {
|
||||
logging.LogErrorf("unmarshal assets texts failed: %s", err)
|
||||
if err = filelock.Remove(assetsTextsPath); nil != err {
|
||||
logging.LogErrorf("removed corrupted assets texts failed: %s", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
util.AssetsTextsLock.Unlock()
|
||||
debug.FreeOSMemory()
|
||||
|
||||
if elapsed := time.Since(start).Seconds(); 2 < elapsed {
|
||||
logging.LogWarnf("read assets texts [%s] to [%s], elapsed [%.2fs]", humanize.BytesCustomCeil(uint64(len(data)), 2), assetsTextsPath, elapsed)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func SaveAssetsTexts() {
|
||||
if !util.AssetsTextsChanged.Load() {
|
||||
return
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
|
||||
util.AssetsTextsLock.Lock()
|
||||
data, err := gulu.JSON.MarshalIndentJSON(util.AssetsTexts, "", " ")
|
||||
if nil != err {
|
||||
logging.LogErrorf("marshal assets texts failed: %s", err)
|
||||
return
|
||||
}
|
||||
util.AssetsTextsLock.Unlock()
|
||||
|
||||
assetsPath := util.GetDataAssetsAbsPath()
|
||||
assetsTextsPath := filepath.Join(assetsPath, "ocr-texts.json")
|
||||
if err = filelock.WriteFile(assetsTextsPath, data); nil != err {
|
||||
logging.LogErrorf("write assets texts failed: %s", err)
|
||||
return
|
||||
}
|
||||
debug.FreeOSMemory()
|
||||
|
||||
if elapsed := time.Since(start).Seconds(); 2 < elapsed {
|
||||
logging.LogWarnf("save assets texts [size=%s] to [%s], elapsed [%.2fs]", humanize.BytesCustomCeil(uint64(len(data)), 2), assetsTextsPath, elapsed)
|
||||
}
|
||||
|
||||
util.AssetsTextsChanged.Store(false)
|
||||
util.SaveAssetsTexts()
|
||||
}
|
||||
|
|
|
@ -1451,7 +1451,7 @@ func processSyncMergeResult(exit, byHand bool, mergeResult *dejavu.MergeResult,
|
|||
}
|
||||
|
||||
if needReloadOcrTexts {
|
||||
LoadAssetsTexts()
|
||||
util.LoadAssetsTexts()
|
||||
}
|
||||
|
||||
if needReloadPlugin {
|
||||
|
|
|
@ -23,6 +23,7 @@ import (
|
|||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime/debug"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
@ -33,39 +34,124 @@ import (
|
|||
"github.com/88250/gulu"
|
||||
"github.com/88250/lute/ast"
|
||||
"github.com/88250/lute/html"
|
||||
"github.com/siyuan-note/filelock"
|
||||
"github.com/siyuan-note/logging"
|
||||
)
|
||||
|
||||
var (
|
||||
TesseractBin = "tesseract"
|
||||
TesseractEnabled bool
|
||||
TesseractMaxSize = 2 * 1000 * uint64(1000)
|
||||
AssetsTexts = map[string]string{}
|
||||
AssetsTextsLock = sync.Mutex{}
|
||||
AssetsTextsChanged = atomic.Bool{}
|
||||
TesseractBin = "tesseract"
|
||||
TesseractEnabled bool
|
||||
TesseractMaxSize = 2 * 1000 * uint64(1000)
|
||||
TesseractLangs []string
|
||||
|
||||
TesseractLangs []string
|
||||
assetsTexts = map[string]string{}
|
||||
assetsTextsLock = sync.Mutex{}
|
||||
assetsTextsChanged = atomic.Bool{}
|
||||
)
|
||||
|
||||
func CleanNotExistAssetsTexts() {
|
||||
assetsTextsLock.Lock()
|
||||
defer assetsTextsLock.Unlock()
|
||||
|
||||
assetsPath := GetDataAssetsAbsPath()
|
||||
var toRemoves []string
|
||||
for asset, _ := range assetsTexts {
|
||||
assetAbsPath := strings.TrimPrefix(asset, "assets")
|
||||
assetAbsPath = filepath.Join(assetsPath, assetAbsPath)
|
||||
if !filelock.IsExist(assetAbsPath) {
|
||||
toRemoves = append(toRemoves, asset)
|
||||
}
|
||||
}
|
||||
|
||||
for _, asset := range toRemoves {
|
||||
delete(assetsTexts, asset)
|
||||
assetsTextsChanged.Store(true)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func LoadAssetsTexts() {
|
||||
assetsPath := GetDataAssetsAbsPath()
|
||||
assetsTextsPath := filepath.Join(assetsPath, "ocr-texts.json")
|
||||
if !filelock.IsExist(assetsTextsPath) {
|
||||
return
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
data, err := filelock.ReadFile(assetsTextsPath)
|
||||
if nil != err {
|
||||
logging.LogErrorf("read assets texts failed: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
assetsTextsLock.Lock()
|
||||
if err = gulu.JSON.UnmarshalJSON(data, &assetsTexts); nil != err {
|
||||
logging.LogErrorf("unmarshal assets texts failed: %s", err)
|
||||
if err = filelock.Remove(assetsTextsPath); nil != err {
|
||||
logging.LogErrorf("removed corrupted assets texts failed: %s", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
assetsTextsLock.Unlock()
|
||||
debug.FreeOSMemory()
|
||||
|
||||
if elapsed := time.Since(start).Seconds(); 2 < elapsed {
|
||||
logging.LogWarnf("read assets texts [%s] to [%s], elapsed [%.2fs]", humanize.BytesCustomCeil(uint64(len(data)), 2), assetsTextsPath, elapsed)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func SaveAssetsTexts() {
|
||||
if !assetsTextsChanged.Load() || !TesseractEnabled {
|
||||
return
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
|
||||
assetsTextsLock.Lock()
|
||||
data, err := gulu.JSON.MarshalIndentJSON(assetsTexts, "", " ")
|
||||
if nil != err {
|
||||
logging.LogErrorf("marshal assets texts failed: %s", err)
|
||||
return
|
||||
}
|
||||
assetsTextsLock.Unlock()
|
||||
|
||||
assetsPath := GetDataAssetsAbsPath()
|
||||
assetsTextsPath := filepath.Join(assetsPath, "ocr-texts.json")
|
||||
if err = filelock.WriteFile(assetsTextsPath, data); nil != err {
|
||||
logging.LogErrorf("write assets texts failed: %s", err)
|
||||
return
|
||||
}
|
||||
debug.FreeOSMemory()
|
||||
|
||||
if elapsed := time.Since(start).Seconds(); 2 < elapsed {
|
||||
logging.LogWarnf("save assets texts [size=%s] to [%s], elapsed [%.2fs]", humanize.BytesCustomCeil(uint64(len(data)), 2), assetsTextsPath, elapsed)
|
||||
}
|
||||
|
||||
assetsTextsChanged.Store(false)
|
||||
}
|
||||
|
||||
func SetAssetText(asset, text string) {
|
||||
AssetsTextsLock.Lock()
|
||||
AssetsTexts[asset] = text
|
||||
AssetsTextsLock.Unlock()
|
||||
AssetsTextsChanged.Store(true)
|
||||
assetsTextsLock.Lock()
|
||||
assetsTexts[asset] = text
|
||||
assetsTextsLock.Unlock()
|
||||
if "" != text {
|
||||
assetsTextsChanged.Store(true)
|
||||
}
|
||||
}
|
||||
|
||||
func ExistsAssetText(asset string) (ret bool) {
|
||||
AssetsTextsLock.Lock()
|
||||
_, ret = AssetsTexts[asset]
|
||||
AssetsTextsLock.Unlock()
|
||||
assetsTextsLock.Lock()
|
||||
_, ret = assetsTexts[asset]
|
||||
assetsTextsLock.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
func GetAssetText(asset string, force bool) (ret string) {
|
||||
if !force {
|
||||
AssetsTextsLock.Lock()
|
||||
ret = AssetsTexts[asset]
|
||||
AssetsTextsLock.Unlock()
|
||||
assetsTextsLock.Lock()
|
||||
ret = assetsTexts[asset]
|
||||
assetsTextsLock.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -73,11 +159,11 @@ func GetAssetText(asset string, force bool) (ret string) {
|
|||
assetAbsPath := strings.TrimPrefix(asset, "assets")
|
||||
assetAbsPath = filepath.Join(assetsPath, assetAbsPath)
|
||||
ret = Tesseract(assetAbsPath)
|
||||
AssetsTextsLock.Lock()
|
||||
AssetsTexts[asset] = ret
|
||||
AssetsTextsLock.Unlock()
|
||||
assetsTextsLock.Lock()
|
||||
assetsTexts[asset] = ret
|
||||
assetsTextsLock.Unlock()
|
||||
if "" != ret {
|
||||
AssetsTextsChanged.Store(true)
|
||||
assetsTextsChanged.Store(true)
|
||||
}
|
||||
return
|
||||
}
|
Loading…
Add table
Reference in a new issue