🎨 OCR no longer blocks document loading https://github.com/siyuan-note/siyuan/issues/9230
This commit is contained in:
parent
b31765d0ab
commit
283917a9a8
8 changed files with 123 additions and 11 deletions
8
kernel/cache/asset.go
vendored
8
kernel/cache/asset.go
vendored
|
@ -51,6 +51,14 @@ func RemoveAsset(path string) {
|
|||
delete(assetsCache, path)
|
||||
}
|
||||
|
||||
func ExistAsset(path string) (ret bool) {
|
||||
assetsLock.Lock()
|
||||
defer assetsLock.Unlock()
|
||||
|
||||
_, ret = assetsCache[path]
|
||||
return
|
||||
}
|
||||
|
||||
func LoadAssets() {
|
||||
defer logging.Recover()
|
||||
|
||||
|
|
|
@ -41,8 +41,8 @@ func StartCron() {
|
|||
go every(10*time.Minute, model.FixIndexJob)
|
||||
go every(10*time.Minute, model.IndexEmbedBlockJob)
|
||||
go every(10*time.Minute, model.CacheVirtualBlockRefJob)
|
||||
go every(12*time.Second, model.OCRAssetsJob)
|
||||
go every(12*time.Second, model.FlushAssetsTextsJob)
|
||||
go every(30*time.Second, model.OCRAssetsJob)
|
||||
go every(30*time.Second, model.FlushAssetsTextsJob)
|
||||
go every(30*time.Second, model.HookDesktopUIProcJob)
|
||||
}
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ func OCRAssetsJob() {
|
|||
return
|
||||
}
|
||||
|
||||
task.AppendTaskWithTimeout(task.OCRImage, 7*time.Second, autoOCRAssets)
|
||||
task.AppendTaskWithTimeout(task.OCRImage, 30*time.Second, autoOCRAssets)
|
||||
}
|
||||
|
||||
func autoOCRAssets() {
|
||||
|
@ -40,7 +40,7 @@ func autoOCRAssets() {
|
|||
if "" != text {
|
||||
util.AssetsTextsChanged = true
|
||||
}
|
||||
if 4 <= i { // 一次任务中最多处理 4 张图片,防止卡顿
|
||||
if 7 <= i { // 一次任务中最多处理 7 张图片,防止长时间占用系统资源
|
||||
break
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,6 +20,9 @@ import (
|
|||
"database/sql"
|
||||
|
||||
"github.com/siyuan-note/siyuan/kernel/cache"
|
||||
"github.com/siyuan-note/siyuan/kernel/filesys"
|
||||
"github.com/siyuan-note/siyuan/kernel/treenode"
|
||||
"github.com/siyuan-note/siyuan/kernel/util"
|
||||
)
|
||||
|
||||
type Block struct {
|
||||
|
@ -88,3 +91,41 @@ func updateBlockContent(tx *sql.Tx, block *Block) (err error) {
|
|||
putBlockCache(block)
|
||||
return
|
||||
}
|
||||
|
||||
func indexNode(tx *sql.Tx, id string) (err error) {
|
||||
bt := treenode.GetBlockTree(id)
|
||||
if nil == bt {
|
||||
return
|
||||
}
|
||||
|
||||
luteEngine := util.NewLute()
|
||||
tree, _ := filesys.LoadTree(bt.BoxID, bt.Path, luteEngine)
|
||||
if nil == tree {
|
||||
return
|
||||
}
|
||||
|
||||
node := treenode.GetNodeInTree(tree, id)
|
||||
if nil == node {
|
||||
return
|
||||
}
|
||||
|
||||
content := treenode.NodeStaticContent(node, nil, true, indexAssetPath)
|
||||
stmt := "UPDATE blocks SET content = ? WHERE id = ?"
|
||||
if err = execStmtTx(tx, stmt, content, id); nil != err {
|
||||
tx.Rollback()
|
||||
return
|
||||
}
|
||||
stmt = "UPDATE blocks_fts SET content = ? WHERE id = ?"
|
||||
if err = execStmtTx(tx, stmt, content, id); nil != err {
|
||||
tx.Rollback()
|
||||
return
|
||||
}
|
||||
if !caseSensitive {
|
||||
stmt = "UPDATE blocks_fts_case_insensitive SET content = ? WHERE id = ?"
|
||||
if err = execStmtTx(tx, stmt, content, id); nil != err {
|
||||
tx.Rollback()
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
|
@ -798,9 +798,18 @@ func buildBlockFromNode(n *ast.Node, tree *parse.Tree) (block *Block, attributes
|
|||
length = utf8.RuneCountInString(fcontent)
|
||||
} else if n.IsContainerBlock() {
|
||||
markdown = treenode.ExportNodeStdMd(n, luteEngine)
|
||||
|
||||
if !treenode.IsNodeOCRed(n) {
|
||||
IndexNodeQueue(n.ID)
|
||||
}
|
||||
content = treenode.NodeStaticContent(n, nil, true, indexAssetPath)
|
||||
fc := treenode.FirstLeafBlock(n)
|
||||
|
||||
if !treenode.IsNodeOCRed(fc) {
|
||||
IndexNodeQueue(fc.ID)
|
||||
}
|
||||
fcontent = treenode.NodeStaticContent(fc, nil, true, false)
|
||||
|
||||
parentID = n.Parent.ID
|
||||
// 将标题块作为父节点
|
||||
if h := heading(n); nil != h {
|
||||
|
@ -809,7 +818,13 @@ func buildBlockFromNode(n *ast.Node, tree *parse.Tree) (block *Block, attributes
|
|||
length = utf8.RuneCountInString(fcontent)
|
||||
} else {
|
||||
markdown = treenode.ExportNodeStdMd(n, luteEngine)
|
||||
|
||||
if !treenode.IsNodeOCRed(n) {
|
||||
IndexNodeQueue(n.ID)
|
||||
}
|
||||
|
||||
content = treenode.NodeStaticContent(n, nil, true, indexAssetPath)
|
||||
|
||||
parentID = n.Parent.ID
|
||||
// 将标题块作为父节点
|
||||
if h := heading(n); nil != h {
|
||||
|
|
|
@ -51,6 +51,7 @@ type dbQueueOperation struct {
|
|||
box string // delete_box/delete_box_refs/index
|
||||
renameTree *parse.Tree // rename/rename_sub_tree
|
||||
block *Block // update_block_content
|
||||
id string // index_node
|
||||
removeAssetHashes []string // delete_assets
|
||||
}
|
||||
|
||||
|
@ -191,6 +192,8 @@ func execOp(op *dbQueueOperation, tx *sql.Tx, context map[string]interface{}) (e
|
|||
err = updateBlockContent(tx, op.block)
|
||||
case "delete_assets":
|
||||
err = deleteAssetsByHashes(tx, op.removeAssetHashes)
|
||||
case "index_node":
|
||||
err = indexNode(tx, op.id)
|
||||
default:
|
||||
msg := fmt.Sprintf("unknown operation [%s]", op.action)
|
||||
logging.LogErrorf(msg)
|
||||
|
@ -199,6 +202,20 @@ func execOp(op *dbQueueOperation, tx *sql.Tx, context map[string]interface{}) (e
|
|||
return
|
||||
}
|
||||
|
||||
func IndexNodeQueue(id string) {
|
||||
dbQueueLock.Lock()
|
||||
defer dbQueueLock.Unlock()
|
||||
|
||||
newOp := &dbQueueOperation{id: id, inQueueTime: time.Now(), action: "index_node"}
|
||||
for i, op := range operationQueue {
|
||||
if "index_node" == op.action && op.id == id {
|
||||
operationQueue[i] = newOp
|
||||
return
|
||||
}
|
||||
}
|
||||
operationQueue = append(operationQueue, newOp)
|
||||
}
|
||||
|
||||
func BatchRemoveAssetsQueue(hashes []string) {
|
||||
if 1 > len(hashes) {
|
||||
return
|
||||
|
|
|
@ -137,6 +137,32 @@ func ExportNodeStdMd(node *ast.Node, luteEngine *lute.Lute) string {
|
|||
return markdown
|
||||
}
|
||||
|
||||
func IsNodeOCRed(node *ast.Node) (ret bool) {
|
||||
ret = true
|
||||
ast.Walk(node, func(n *ast.Node, entering bool) ast.WalkStatus {
|
||||
if !entering {
|
||||
return ast.WalkContinue
|
||||
}
|
||||
|
||||
if ast.NodeImage == n.Type {
|
||||
linkDest := n.ChildByType(ast.NodeLinkDest)
|
||||
if nil != linkDest {
|
||||
linkDestStr := linkDest.TokensStr()
|
||||
if !cache.ExistAsset(linkDestStr) {
|
||||
return ast.WalkContinue
|
||||
}
|
||||
|
||||
if !util.ExistsAssetText(linkDestStr) {
|
||||
ret = false
|
||||
return ast.WalkStop
|
||||
}
|
||||
}
|
||||
}
|
||||
return ast.WalkContinue
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
func NodeStaticContent(node *ast.Node, excludeTypes []string, includeTextMarkATitleURL, includeAssetPath bool) string {
|
||||
if nil == node {
|
||||
return ""
|
||||
|
|
|
@ -52,27 +52,32 @@ func SetAssetText(asset, text string) {
|
|||
AssetsTextsChanged = true
|
||||
}
|
||||
|
||||
func GetAssetText(asset string, force bool) string {
|
||||
func ExistsAssetText(asset string) (ret bool) {
|
||||
AssetsTextsLock.Lock()
|
||||
_, ret = AssetsTexts[asset]
|
||||
AssetsTextsLock.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
func GetAssetText(asset string, force bool) (ret string) {
|
||||
if !force {
|
||||
AssetsTextsLock.Lock()
|
||||
ret, ok := AssetsTexts[asset]
|
||||
ret = AssetsTexts[asset]
|
||||
AssetsTextsLock.Unlock()
|
||||
if ok {
|
||||
return ret
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
assetsPath := GetDataAssetsAbsPath()
|
||||
assetAbsPath := strings.TrimPrefix(asset, "assets")
|
||||
assetAbsPath = filepath.Join(assetsPath, assetAbsPath)
|
||||
ret := Tesseract(assetAbsPath)
|
||||
ret = Tesseract(assetAbsPath)
|
||||
AssetsTextsLock.Lock()
|
||||
AssetsTexts[asset] = ret
|
||||
AssetsTextsLock.Unlock()
|
||||
if "" != ret {
|
||||
AssetsTextsChanged = true
|
||||
}
|
||||
return ret
|
||||
return
|
||||
}
|
||||
|
||||
func IsTesseractExtractable(p string) bool {
|
||||
|
|
Loading…
Add table
Reference in a new issue