ocr.go 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. package model
  2. import (
  3. "path/filepath"
  4. "strings"
  5. "time"
  6. "github.com/siyuan-note/logging"
  7. "github.com/siyuan-note/siyuan/kernel/cache"
  8. "github.com/siyuan-note/siyuan/kernel/sql"
  9. "github.com/siyuan-note/siyuan/kernel/task"
  10. "github.com/siyuan-note/siyuan/kernel/util"
  11. )
  12. func OCRAssetsJob() {
  13. util.WaitForTesseractInit()
  14. if !util.TesseractEnabled {
  15. return
  16. }
  17. task.AppendTaskWithTimeout(task.OCRImage, 30*time.Second, autoOCRAssets)
  18. }
  19. func autoOCRAssets() {
  20. if !util.TesseractEnabled {
  21. return
  22. }
  23. defer logging.Recover()
  24. assetsPath := util.GetDataAssetsAbsPath()
  25. assets := getUnOCRAssetsAbsPaths()
  26. if 0 < len(assets) {
  27. for i, assetAbsPath := range assets {
  28. text := util.GetOcrJsonText(util.Tesseract(assetAbsPath))
  29. p := strings.TrimPrefix(assetAbsPath, assetsPath)
  30. p = "assets" + filepath.ToSlash(p)
  31. util.SetAssetText(p, text)
  32. if 7 <= i { // 一次任务中最多处理 7 张图片,防止长时间占用系统资源
  33. break
  34. }
  35. }
  36. }
  37. util.CleanNotExistAssetsTexts()
  38. // 刷新 OCR 结果到数据库
  39. util.NodeOCRQueueLock.Lock()
  40. defer util.NodeOCRQueueLock.Unlock()
  41. for _, id := range util.NodeOCRQueue {
  42. sql.IndexNodeQueue(id)
  43. }
  44. util.NodeOCRQueue = nil
  45. }
  46. func getUnOCRAssetsAbsPaths() (ret []string) {
  47. var assetsPaths []string
  48. assets := cache.GetAssets()
  49. for _, asset := range assets {
  50. if !util.IsTesseractExtractable(asset.Path) {
  51. continue
  52. }
  53. assetsPaths = append(assetsPaths, asset.Path)
  54. }
  55. assetsPath := util.GetDataAssetsAbsPath()
  56. for _, assetPath := range assetsPaths {
  57. if util.ExistsAssetText(assetPath) {
  58. continue
  59. }
  60. absPath := filepath.Join(assetsPath, strings.TrimPrefix(assetPath, "assets"))
  61. ret = append(ret, absPath)
  62. }
  63. return
  64. }
  65. func FlushAssetsTextsJob() {
  66. util.SaveAssetsTexts()
  67. }