1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980 |
- package model
- import (
- "path/filepath"
- "strings"
- "time"
- "github.com/siyuan-note/logging"
- "github.com/siyuan-note/siyuan/kernel/cache"
- "github.com/siyuan-note/siyuan/kernel/sql"
- "github.com/siyuan-note/siyuan/kernel/task"
- "github.com/siyuan-note/siyuan/kernel/util"
- )
- func OCRAssetsJob() {
- util.WaitForTesseractInit()
- if !util.TesseractEnabled {
- return
- }
- task.AppendTaskWithTimeout(task.OCRImage, 30*time.Second, autoOCRAssets)
- }
- func autoOCRAssets() {
- if !util.TesseractEnabled {
- return
- }
- defer logging.Recover()
- assetsPath := util.GetDataAssetsAbsPath()
- assets := getUnOCRAssetsAbsPaths()
- if 0 < len(assets) {
- for i, assetAbsPath := range assets {
- text := util.GetOcrJsonText(util.Tesseract(assetAbsPath))
- p := strings.TrimPrefix(assetAbsPath, assetsPath)
- p = "assets" + filepath.ToSlash(p)
- util.SetAssetText(p, text)
- if 7 <= i { // 一次任务中最多处理 7 张图片,防止长时间占用系统资源
- break
- }
- }
- }
- util.CleanNotExistAssetsTexts()
- // 刷新 OCR 结果到数据库
- util.NodeOCRQueueLock.Lock()
- defer util.NodeOCRQueueLock.Unlock()
- for _, id := range util.NodeOCRQueue {
- sql.IndexNodeQueue(id)
- }
- util.NodeOCRQueue = nil
- }
- func getUnOCRAssetsAbsPaths() (ret []string) {
- var assetsPaths []string
- assets := cache.GetAssets()
- for _, asset := range assets {
- if !util.IsTesseractExtractable(asset.Path) {
- continue
- }
- assetsPaths = append(assetsPaths, asset.Path)
- }
- assetsPath := util.GetDataAssetsAbsPath()
- for _, assetPath := range assetsPaths {
- if util.ExistsAssetText(assetPath) {
- continue
- }
- absPath := filepath.Join(assetsPath, strings.TrimPrefix(assetPath, "assets"))
- ret = append(ret, absPath)
- }
- return
- }
- func FlushAssetsTextsJob() {
- util.SaveAssetsTexts()
- }
|