🎨 OCR no longer blocks document loading https://github.com/siyuan-note/siyuan/issues/9230

This commit is contained in:
Daniel 2023-12-07 20:40:16 +08:00
parent b31765d0ab
commit 283917a9a8
No known key found for this signature in database
GPG key ID: 86211BA83DF03017
8 changed files with 123 additions and 11 deletions

View file

@ -51,6 +51,14 @@ func RemoveAsset(path string) {
delete(assetsCache, path)
}
func ExistAsset(path string) (ret bool) {
assetsLock.Lock()
defer assetsLock.Unlock()
_, ret = assetsCache[path]
return
}
func LoadAssets() {
defer logging.Recover()

View file

@ -41,8 +41,8 @@ func StartCron() {
go every(10*time.Minute, model.FixIndexJob)
go every(10*time.Minute, model.IndexEmbedBlockJob)
go every(10*time.Minute, model.CacheVirtualBlockRefJob)
go every(12*time.Second, model.OCRAssetsJob)
go every(12*time.Second, model.FlushAssetsTextsJob)
go every(30*time.Second, model.OCRAssetsJob)
go every(30*time.Second, model.FlushAssetsTextsJob)
go every(30*time.Second, model.HookDesktopUIProcJob)
}

View file

@ -21,7 +21,7 @@ func OCRAssetsJob() {
return
}
task.AppendTaskWithTimeout(task.OCRImage, 7*time.Second, autoOCRAssets)
task.AppendTaskWithTimeout(task.OCRImage, 30*time.Second, autoOCRAssets)
}
func autoOCRAssets() {
@ -40,7 +40,7 @@ func autoOCRAssets() {
if "" != text {
util.AssetsTextsChanged = true
}
if 4 <= i { // 一次任务中最多处理 4 张图片,防止卡顿
if 7 <= i { // 一次任务中最多处理 7 张图片,防止长时间占用系统资源
break
}
}

View file

@ -20,6 +20,9 @@ import (
"database/sql"
"github.com/siyuan-note/siyuan/kernel/cache"
"github.com/siyuan-note/siyuan/kernel/filesys"
"github.com/siyuan-note/siyuan/kernel/treenode"
"github.com/siyuan-note/siyuan/kernel/util"
)
type Block struct {
@ -88,3 +91,41 @@ func updateBlockContent(tx *sql.Tx, block *Block) (err error) {
putBlockCache(block)
return
}
func indexNode(tx *sql.Tx, id string) (err error) {
bt := treenode.GetBlockTree(id)
if nil == bt {
return
}
luteEngine := util.NewLute()
tree, _ := filesys.LoadTree(bt.BoxID, bt.Path, luteEngine)
if nil == tree {
return
}
node := treenode.GetNodeInTree(tree, id)
if nil == node {
return
}
content := treenode.NodeStaticContent(node, nil, true, indexAssetPath)
stmt := "UPDATE blocks SET content = ? WHERE id = ?"
if err = execStmtTx(tx, stmt, content, id); nil != err {
tx.Rollback()
return
}
stmt = "UPDATE blocks_fts SET content = ? WHERE id = ?"
if err = execStmtTx(tx, stmt, content, id); nil != err {
tx.Rollback()
return
}
if !caseSensitive {
stmt = "UPDATE blocks_fts_case_insensitive SET content = ? WHERE id = ?"
if err = execStmtTx(tx, stmt, content, id); nil != err {
tx.Rollback()
return
}
}
return
}

View file

@ -798,9 +798,18 @@ func buildBlockFromNode(n *ast.Node, tree *parse.Tree) (block *Block, attributes
length = utf8.RuneCountInString(fcontent)
} else if n.IsContainerBlock() {
markdown = treenode.ExportNodeStdMd(n, luteEngine)
if !treenode.IsNodeOCRed(n) {
IndexNodeQueue(n.ID)
}
content = treenode.NodeStaticContent(n, nil, true, indexAssetPath)
fc := treenode.FirstLeafBlock(n)
if !treenode.IsNodeOCRed(fc) {
IndexNodeQueue(fc.ID)
}
fcontent = treenode.NodeStaticContent(fc, nil, true, false)
parentID = n.Parent.ID
// 将标题块作为父节点
if h := heading(n); nil != h {
@ -809,7 +818,13 @@ func buildBlockFromNode(n *ast.Node, tree *parse.Tree) (block *Block, attributes
length = utf8.RuneCountInString(fcontent)
} else {
markdown = treenode.ExportNodeStdMd(n, luteEngine)
if !treenode.IsNodeOCRed(n) {
IndexNodeQueue(n.ID)
}
content = treenode.NodeStaticContent(n, nil, true, indexAssetPath)
parentID = n.Parent.ID
// 将标题块作为父节点
if h := heading(n); nil != h {

View file

@ -51,6 +51,7 @@ type dbQueueOperation struct {
box string // delete_box/delete_box_refs/index
renameTree *parse.Tree // rename/rename_sub_tree
block *Block // update_block_content
id string // index_node
removeAssetHashes []string // delete_assets
}
@ -191,6 +192,8 @@ func execOp(op *dbQueueOperation, tx *sql.Tx, context map[string]interface{}) (e
err = updateBlockContent(tx, op.block)
case "delete_assets":
err = deleteAssetsByHashes(tx, op.removeAssetHashes)
case "index_node":
err = indexNode(tx, op.id)
default:
msg := fmt.Sprintf("unknown operation [%s]", op.action)
logging.LogErrorf(msg)
@ -199,6 +202,20 @@ func execOp(op *dbQueueOperation, tx *sql.Tx, context map[string]interface{}) (e
return
}
func IndexNodeQueue(id string) {
dbQueueLock.Lock()
defer dbQueueLock.Unlock()
newOp := &dbQueueOperation{id: id, inQueueTime: time.Now(), action: "index_node"}
for i, op := range operationQueue {
if "index_node" == op.action && op.id == id {
operationQueue[i] = newOp
return
}
}
operationQueue = append(operationQueue, newOp)
}
func BatchRemoveAssetsQueue(hashes []string) {
if 1 > len(hashes) {
return

View file

@ -137,6 +137,32 @@ func ExportNodeStdMd(node *ast.Node, luteEngine *lute.Lute) string {
return markdown
}
func IsNodeOCRed(node *ast.Node) (ret bool) {
ret = true
ast.Walk(node, func(n *ast.Node, entering bool) ast.WalkStatus {
if !entering {
return ast.WalkContinue
}
if ast.NodeImage == n.Type {
linkDest := n.ChildByType(ast.NodeLinkDest)
if nil != linkDest {
linkDestStr := linkDest.TokensStr()
if !cache.ExistAsset(linkDestStr) {
return ast.WalkContinue
}
if !util.ExistsAssetText(linkDestStr) {
ret = false
return ast.WalkStop
}
}
}
return ast.WalkContinue
})
return
}
func NodeStaticContent(node *ast.Node, excludeTypes []string, includeTextMarkATitleURL, includeAssetPath bool) string {
if nil == node {
return ""

View file

@ -52,27 +52,32 @@ func SetAssetText(asset, text string) {
AssetsTextsChanged = true
}
func GetAssetText(asset string, force bool) string {
func ExistsAssetText(asset string) (ret bool) {
AssetsTextsLock.Lock()
_, ret = AssetsTexts[asset]
AssetsTextsLock.Unlock()
return
}
func GetAssetText(asset string, force bool) (ret string) {
if !force {
AssetsTextsLock.Lock()
ret, ok := AssetsTexts[asset]
ret = AssetsTexts[asset]
AssetsTextsLock.Unlock()
if ok {
return ret
}
return
}
assetsPath := GetDataAssetsAbsPath()
assetAbsPath := strings.TrimPrefix(asset, "assets")
assetAbsPath = filepath.Join(assetsPath, assetAbsPath)
ret := Tesseract(assetAbsPath)
ret = Tesseract(assetAbsPath)
AssetsTextsLock.Lock()
AssetsTexts[asset] = ret
AssetsTextsLock.Unlock()
if "" != ret {
AssetsTextsChanged = true
}
return ret
return
}
func IsTesseractExtractable(p string) bool {