index.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432
  1. // SiYuan - Refactor your thinking
  2. // Copyright (c) 2020-present, b3log.org
  3. //
  4. // This program is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Affero General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // This program is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Affero General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Affero General Public License
  15. // along with this program. If not, see <https://www.gnu.org/licenses/>.
  16. package model
  17. import (
  18. "bytes"
  19. "fmt"
  20. "io/fs"
  21. "path/filepath"
  22. "runtime"
  23. "runtime/debug"
  24. "strings"
  25. "sync"
  26. "time"
  27. "github.com/88250/go-humanize"
  28. "github.com/88250/gulu"
  29. "github.com/88250/lute/ast"
  30. "github.com/88250/lute/editor"
  31. "github.com/88250/lute/html"
  32. "github.com/88250/lute/parse"
  33. "github.com/panjf2000/ants/v2"
  34. "github.com/siyuan-note/eventbus"
  35. "github.com/siyuan-note/filelock"
  36. "github.com/siyuan-note/logging"
  37. "github.com/siyuan-note/siyuan/kernel/av"
  38. "github.com/siyuan-note/siyuan/kernel/cache"
  39. "github.com/siyuan-note/siyuan/kernel/filesys"
  40. "github.com/siyuan-note/siyuan/kernel/sql"
  41. "github.com/siyuan-note/siyuan/kernel/task"
  42. "github.com/siyuan-note/siyuan/kernel/treenode"
  43. "github.com/siyuan-note/siyuan/kernel/util"
  44. )
  45. func UpsertIndexes(paths []string) {
  46. var syFiles []string
  47. for _, p := range paths {
  48. if strings.HasSuffix(p, "/") {
  49. syFiles = append(syFiles, listSyFiles(p)...)
  50. continue
  51. }
  52. if strings.HasSuffix(p, ".sy") {
  53. syFiles = append(syFiles, p)
  54. }
  55. }
  56. syFiles = gulu.Str.RemoveDuplicatedElem(syFiles)
  57. upsertIndexes(syFiles)
  58. }
  59. func RemoveIndexes(paths []string) {
  60. var syFiles []string
  61. for _, p := range paths {
  62. if strings.HasSuffix(p, "/") {
  63. syFiles = append(syFiles, listSyFiles(p)...)
  64. continue
  65. }
  66. if strings.HasSuffix(p, ".sy") {
  67. syFiles = append(syFiles, p)
  68. }
  69. }
  70. syFiles = gulu.Str.RemoveDuplicatedElem(syFiles)
  71. removeIndexes(syFiles)
  72. }
  73. func listSyFiles(dir string) (ret []string) {
  74. dirPath := filepath.Join(util.DataDir, dir)
  75. err := filelock.Walk(dirPath, func(path string, d fs.FileInfo, err error) error {
  76. if nil != err {
  77. logging.LogWarnf("walk dir [%s] failed: %s", dirPath, err)
  78. return err
  79. }
  80. if d.IsDir() {
  81. return nil
  82. }
  83. if strings.HasSuffix(path, ".sy") {
  84. p := filepath.ToSlash(strings.TrimPrefix(path, util.DataDir))
  85. ret = append(ret, p)
  86. }
  87. return nil
  88. })
  89. if nil != err {
  90. logging.LogWarnf("walk dir [%s] failed: %s", dirPath, err)
  91. }
  92. return
  93. }
  94. func (box *Box) Unindex() {
  95. task.AppendTask(task.DatabaseIndex, unindex, box.ID)
  96. go func() {
  97. sql.WaitForWritingDatabase()
  98. ResetVirtualBlockRefCache()
  99. }()
  100. }
  101. func unindex(boxID string) {
  102. ids := treenode.RemoveBlockTreesByBoxID(boxID)
  103. RemoveRecentDoc(ids)
  104. sql.DeleteBoxQueue(boxID)
  105. }
  106. func (box *Box) Index() {
  107. task.AppendTask(task.DatabaseIndexRef, removeBoxRefs, box.ID)
  108. task.AppendTask(task.DatabaseIndex, index, box.ID)
  109. task.AppendTask(task.DatabaseIndexRef, IndexRefs)
  110. go func() {
  111. sql.WaitForWritingDatabase()
  112. ResetVirtualBlockRefCache()
  113. }()
  114. }
  115. func removeBoxRefs(boxID string) {
  116. sql.DeleteBoxRefsQueue(boxID)
  117. }
  118. func index(boxID string) {
  119. box := Conf.Box(boxID)
  120. if nil == box {
  121. return
  122. }
  123. util.SetBootDetails("Listing files...")
  124. files := box.ListFiles("/")
  125. boxLen := len(Conf.GetOpenedBoxes())
  126. if 1 > boxLen {
  127. boxLen = 1
  128. }
  129. bootProgressPart := int32(30.0 / float64(boxLen) / float64(len(files)))
  130. start := time.Now()
  131. luteEngine := util.NewLute()
  132. var treeCount int
  133. var treeSize int64
  134. lock := sync.Mutex{}
  135. util.PushStatusBar(fmt.Sprintf("["+html.EscapeString(box.Name)+"] "+Conf.Language(64), len(files)))
  136. poolSize := runtime.NumCPU()
  137. if 4 < poolSize {
  138. poolSize = 4
  139. }
  140. waitGroup := &sync.WaitGroup{}
  141. var avNodes []*ast.Node
  142. p, _ := ants.NewPoolWithFunc(poolSize, func(arg interface{}) {
  143. defer waitGroup.Done()
  144. file := arg.(*FileInfo)
  145. lock.Lock()
  146. treeSize += file.size
  147. treeCount++
  148. i := treeCount
  149. lock.Unlock()
  150. tree, err := filesys.LoadTree(box.ID, file.path, luteEngine)
  151. if nil != err {
  152. logging.LogErrorf("read box [%s] tree [%s] failed: %s", box.ID, file.path, err)
  153. return
  154. }
  155. docIAL := parse.IAL2MapUnEsc(tree.Root.KramdownIAL)
  156. if "" == docIAL["updated"] { // 早期的数据可能没有 updated 属性,这里进行订正
  157. updated := util.TimeFromID(tree.Root.ID)
  158. tree.Root.SetIALAttr("updated", updated)
  159. docIAL["updated"] = updated
  160. if writeErr := filesys.WriteTree(tree); nil != writeErr {
  161. logging.LogErrorf("write tree [%s] failed: %s", tree.Path, writeErr)
  162. }
  163. }
  164. lock.Lock()
  165. avNodes = append(avNodes, tree.Root.ChildrenByType(ast.NodeAttributeView)...)
  166. lock.Unlock()
  167. cache.PutDocIAL(file.path, docIAL)
  168. treenode.IndexBlockTree(tree)
  169. sql.IndexTreeQueue(tree)
  170. util.IncBootProgress(bootProgressPart, fmt.Sprintf(Conf.Language(92), util.ShortPathForBootingDisplay(tree.Path)))
  171. if 1 < i && 0 == i%64 {
  172. util.PushStatusBar(fmt.Sprintf(Conf.Language(88), i, (len(files))-i))
  173. }
  174. })
  175. for _, file := range files {
  176. if file.isdir || !strings.HasSuffix(file.name, ".sy") {
  177. continue
  178. }
  179. waitGroup.Add(1)
  180. invokeErr := p.Invoke(file)
  181. if nil != invokeErr {
  182. logging.LogErrorf("invoke [%s] failed: %s", file.path, invokeErr)
  183. continue
  184. }
  185. }
  186. waitGroup.Wait()
  187. p.Release()
  188. // 关联数据库和块
  189. av.BatchUpsertBlockRel(avNodes)
  190. box.UpdateHistoryGenerated() // 初始化历史生成时间为当前时间
  191. end := time.Now()
  192. elapsed := end.Sub(start).Seconds()
  193. logging.LogInfof("rebuilt database for notebook [%s] in [%.2fs], tree [count=%d, size=%s]", box.ID, elapsed, treeCount, humanize.BytesCustomCeil(uint64(treeSize), 2))
  194. debug.FreeOSMemory()
  195. return
  196. }
  197. func IndexRefs() {
  198. start := time.Now()
  199. util.SetBootDetails("Resolving refs...")
  200. util.PushStatusBar(Conf.Language(54))
  201. util.SetBootDetails("Indexing refs...")
  202. var defBlockIDs []string
  203. luteEngine := util.NewLute()
  204. boxes := Conf.GetOpenedBoxes()
  205. for _, box := range boxes {
  206. pages := pagedPaths(filepath.Join(util.DataDir, box.ID), 32)
  207. for _, paths := range pages {
  208. for _, treeAbsPath := range paths {
  209. data, readErr := filelock.ReadFile(treeAbsPath)
  210. if nil != readErr {
  211. logging.LogWarnf("get data [path=%s] failed: %s", treeAbsPath, readErr)
  212. continue
  213. }
  214. if !bytes.Contains(data, []byte("TextMarkBlockRefID")) && !bytes.Contains(data, []byte("TextMarkFileAnnotationRefID")) {
  215. continue
  216. }
  217. p := filepath.ToSlash(strings.TrimPrefix(treeAbsPath, filepath.Join(util.DataDir, box.ID)))
  218. tree, parseErr := filesys.LoadTreeByData(data, box.ID, p, luteEngine)
  219. if nil != parseErr {
  220. logging.LogWarnf("parse json to tree [%s] failed: %s", treeAbsPath, parseErr)
  221. continue
  222. }
  223. ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
  224. if !entering {
  225. return ast.WalkContinue
  226. }
  227. if treenode.IsBlockRef(n) || treenode.IsFileAnnotationRef(n) {
  228. defBlockIDs = append(defBlockIDs, tree.Root.ID)
  229. }
  230. return ast.WalkContinue
  231. })
  232. }
  233. }
  234. }
  235. defBlockIDs = gulu.Str.RemoveDuplicatedElem(defBlockIDs)
  236. i := 0
  237. size := len(defBlockIDs)
  238. if 0 < size {
  239. bootProgressPart := int32(10.0 / float64(size))
  240. for _, defBlockID := range defBlockIDs {
  241. defTree, loadErr := LoadTreeByBlockID(defBlockID)
  242. if nil != loadErr {
  243. continue
  244. }
  245. util.IncBootProgress(bootProgressPart, "Indexing ref "+defTree.ID)
  246. sql.UpdateRefsTreeQueue(defTree)
  247. if 1 < i && 0 == i%64 {
  248. util.PushStatusBar(fmt.Sprintf(Conf.Language(55), i))
  249. }
  250. i++
  251. }
  252. }
  253. logging.LogInfof("resolved refs [%d] in [%dms]", size, time.Now().Sub(start).Milliseconds())
  254. util.PushStatusBar(fmt.Sprintf(Conf.Language(55), i))
  255. }
  256. var indexEmbedBlockLock = sync.Mutex{}
  257. // IndexEmbedBlockJob 嵌入块支持搜索 https://github.com/siyuan-note/siyuan/issues/7112
  258. func IndexEmbedBlockJob() {
  259. task.AppendTaskWithTimeout(task.DatabaseIndexEmbedBlock, 30*time.Second, autoIndexEmbedBlock)
  260. }
  261. func autoIndexEmbedBlock() {
  262. indexEmbedBlockLock.Lock()
  263. defer indexEmbedBlockLock.Unlock()
  264. embedBlocks := sql.QueryEmptyContentEmbedBlocks()
  265. for i, embedBlock := range embedBlocks {
  266. markdown := strings.TrimSpace(embedBlock.Markdown)
  267. markdown = strings.TrimPrefix(markdown, "{{")
  268. stmt := strings.TrimSuffix(markdown, "}}")
  269. // 嵌入块的 Markdown 内容需要反转义
  270. stmt = html.UnescapeString(stmt)
  271. stmt = strings.ReplaceAll(stmt, editor.IALValEscNewLine, "\n")
  272. // 需要移除首尾的空白字符以判断是否具有 //!js 标记
  273. stmt = strings.TrimSpace(stmt)
  274. if strings.HasPrefix(stmt, "//!js") {
  275. // https://github.com/siyuan-note/siyuan/issues/9648
  276. // js 嵌入块不支持自动索引,由前端主动调用 /api/search/updateEmbedBlock 接口更新内容 https://github.com/siyuan-note/siyuan/issues/9736
  277. continue
  278. }
  279. if !strings.Contains(strings.ToLower(stmt), "select") {
  280. continue
  281. }
  282. queryResultBlocks := sql.SelectBlocksRawStmtNoParse(stmt, 102400)
  283. for _, block := range queryResultBlocks {
  284. embedBlock.Content += block.Content
  285. }
  286. if "" == embedBlock.Content {
  287. embedBlock.Content = "no query result"
  288. }
  289. sql.UpdateBlockContentQueue(embedBlock)
  290. if 63 <= i { // 一次任务中最多处理 64 个嵌入块,防止卡顿
  291. break
  292. }
  293. }
  294. }
  295. func updateEmbedBlockContent(embedBlockID string, queryResultBlocks []*EmbedBlock) {
  296. embedBlock := sql.GetBlock(embedBlockID)
  297. if nil == embedBlock {
  298. return
  299. }
  300. embedBlock.Content = "" // 嵌入块每查询一次多一个结果 https://github.com/siyuan-note/siyuan/issues/7196
  301. for _, block := range queryResultBlocks {
  302. embedBlock.Content += block.Block.Markdown
  303. }
  304. if "" == embedBlock.Content {
  305. embedBlock.Content = "no query result"
  306. }
  307. sql.UpdateBlockContentQueue(embedBlock)
  308. }
  309. func init() {
  310. subscribeSQLEvents()
  311. }
  312. func subscribeSQLEvents() {
  313. // 使用下面的 EvtSQLInsertBlocksFTS 就可以了
  314. //eventbus.Subscribe(eventbus.EvtSQLInsertBlocks, func(context map[string]interface{}, current, total, blockCount int, hash string) {
  315. // if util.ContainerAndroid == util.Container || util.ContainerIOS == util.Container {
  316. // // Android/iOS 端不显示数据索引和搜索索引状态提示 https://github.com/siyuan-note/siyuan/issues/6392
  317. // return
  318. // }
  319. //
  320. // msg := fmt.Sprintf(Conf.Language(89), current, total, blockCount, hash)
  321. // util.SetBootDetails(msg)
  322. // util.ContextPushMsg(context, msg)
  323. //})
  324. eventbus.Subscribe(eventbus.EvtSQLInsertBlocksFTS, func(context map[string]interface{}, blockCount int, hash string) {
  325. if util.ContainerAndroid == util.Container || util.ContainerIOS == util.Container {
  326. // Android/iOS 端不显示数据索引和搜索索引状态提示 https://github.com/siyuan-note/siyuan/issues/6392
  327. return
  328. }
  329. current := context["current"].(int)
  330. total := context["total"]
  331. msg := fmt.Sprintf(Conf.Language(90), current, total, blockCount, hash)
  332. util.SetBootDetails(msg)
  333. util.ContextPushMsg(context, msg)
  334. })
  335. eventbus.Subscribe(eventbus.EvtSQLDeleteBlocks, func(context map[string]interface{}, rootID string) {
  336. if util.ContainerAndroid == util.Container || util.ContainerIOS == util.Container {
  337. return
  338. }
  339. current := context["current"].(int)
  340. total := context["total"]
  341. msg := fmt.Sprintf(Conf.Language(93), current, total, rootID)
  342. util.SetBootDetails(msg)
  343. util.ContextPushMsg(context, msg)
  344. })
  345. eventbus.Subscribe(eventbus.EvtSQLUpdateBlocksHPaths, func(context map[string]interface{}, blockCount int, hash string) {
  346. if util.ContainerAndroid == util.Container || util.ContainerIOS == util.Container {
  347. return
  348. }
  349. current := context["current"].(int)
  350. total := context["total"]
  351. msg := fmt.Sprintf(Conf.Language(234), current, total, blockCount, hash)
  352. util.SetBootDetails(msg)
  353. util.ContextPushMsg(context, msg)
  354. })
  355. eventbus.Subscribe(eventbus.EvtSQLInsertHistory, func(context map[string]interface{}) {
  356. if util.ContainerAndroid == util.Container || util.ContainerIOS == util.Container {
  357. return
  358. }
  359. current := context["current"].(int)
  360. total := context["total"]
  361. msg := fmt.Sprintf(Conf.Language(191), current, total)
  362. util.SetBootDetails(msg)
  363. util.ContextPushMsg(context, msg)
  364. })
  365. eventbus.Subscribe(eventbus.EvtSQLInsertAssetContent, func(context map[string]interface{}) {
  366. if util.ContainerAndroid == util.Container || util.ContainerIOS == util.Container {
  367. return
  368. }
  369. current := context["current"].(int)
  370. total := context["total"]
  371. msg := fmt.Sprintf(Conf.Language(217), current, total)
  372. util.SetBootDetails(msg)
  373. util.ContextPushMsg(context, msg)
  374. })
  375. }