index_fix.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. // SiYuan - Refactor your thinking
  2. // Copyright (c) 2020-present, b3log.org
  3. //
  4. // This program is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Affero General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // This program is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Affero General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Affero General Public License
  15. // along with this program. If not, see <https://www.gnu.org/licenses/>.
  16. package model
  17. import (
  18. "fmt"
  19. "os"
  20. "path"
  21. "path/filepath"
  22. "runtime/debug"
  23. "strings"
  24. "sync"
  25. "sync/atomic"
  26. "time"
  27. "github.com/88250/gulu"
  28. "github.com/88250/lute"
  29. "github.com/88250/lute/ast"
  30. "github.com/88250/lute/html"
  31. "github.com/88250/lute/parse"
  32. "github.com/siyuan-note/filelock"
  33. "github.com/siyuan-note/logging"
  34. "github.com/siyuan-note/siyuan/kernel/filesys"
  35. "github.com/siyuan-note/siyuan/kernel/sql"
  36. "github.com/siyuan-note/siyuan/kernel/task"
  37. "github.com/siyuan-note/siyuan/kernel/treenode"
  38. "github.com/siyuan-note/siyuan/kernel/util"
  39. )
  40. var (
  41. checkIndexPerformed = atomic.Bool{}
  42. )
  43. // checkIndex 自动校验数据库索引,仅在数据同步执行完成后执行一次。
  44. func checkIndex() {
  45. if checkIndexPerformed.Load() {
  46. return
  47. }
  48. logging.LogInfof("start checking index...")
  49. task.AppendTask(task.DatabaseIndexFix, removeDuplicateDatabaseIndex)
  50. sql.WaitForWritingDatabase()
  51. task.AppendTask(task.DatabaseIndexFix, resetDuplicateBlocksOnFileSys)
  52. task.AppendTask(task.DatabaseIndexFix, fixBlockTreeByFileSys)
  53. sql.WaitForWritingDatabase()
  54. task.AppendTask(task.DatabaseIndexFix, fixDatabaseIndexByBlockTree)
  55. sql.WaitForWritingDatabase()
  56. task.AppendTask(task.DatabaseIndexFix, removeDuplicateDatabaseRefs)
  57. // 后面要加任务的话记得修改推送任务栏的进度 util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 1, 5))
  58. task.AppendTask(task.DatabaseIndexFix, func() {
  59. util.PushStatusBar(Conf.Language(185))
  60. })
  61. debug.FreeOSMemory()
  62. logging.LogInfof("finish checking index")
  63. checkIndexPerformed.Store(true)
  64. }
  65. var autoFixLock = sync.Mutex{}
  66. // removeDuplicateDatabaseRefs 删除重复的数据库引用关系。
  67. func removeDuplicateDatabaseRefs() {
  68. defer logging.Recover()
  69. autoFixLock.Lock()
  70. defer autoFixLock.Unlock()
  71. util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 5, 5))
  72. duplicatedRootIDs := sql.GetRefDuplicatedDefRootIDs()
  73. for _, rootID := range duplicatedRootIDs {
  74. refreshRefsByDefID(rootID)
  75. }
  76. for _, rootID := range duplicatedRootIDs {
  77. logging.LogWarnf("exist more than one ref duplicated [%s], reindex it", rootID)
  78. }
  79. }
  80. // removeDuplicateDatabaseIndex 删除重复的数据库索引。
  81. func removeDuplicateDatabaseIndex() {
  82. defer logging.Recover()
  83. autoFixLock.Lock()
  84. defer autoFixLock.Unlock()
  85. util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 1, 5))
  86. duplicatedRootIDs := sql.GetDuplicatedRootIDs("blocks")
  87. if 1 > len(duplicatedRootIDs) {
  88. duplicatedRootIDs = sql.GetDuplicatedRootIDs("blocks_fts")
  89. if 1 > len(duplicatedRootIDs) && !Conf.Search.CaseSensitive {
  90. duplicatedRootIDs = sql.GetDuplicatedRootIDs("blocks_fts_case_insensitive")
  91. }
  92. }
  93. roots := sql.GetBlocks(duplicatedRootIDs)
  94. rootMap := map[string]*sql.Block{}
  95. for _, root := range roots {
  96. if nil == root {
  97. continue
  98. }
  99. rootMap[root.ID] = root
  100. }
  101. var toRemoveRootIDs []string
  102. var deletes int
  103. for _, rootID := range duplicatedRootIDs {
  104. root := rootMap[rootID]
  105. if nil == root {
  106. continue
  107. }
  108. deletes++
  109. toRemoveRootIDs = append(toRemoveRootIDs, rootID)
  110. if util.IsExiting.Load() {
  111. break
  112. }
  113. }
  114. toRemoveRootIDs = gulu.Str.RemoveDuplicatedElem(toRemoveRootIDs)
  115. sql.BatchRemoveTreeQueue(toRemoveRootIDs)
  116. if 0 < deletes {
  117. logging.LogWarnf("exist more than one tree duplicated [%d], reindex it", deletes)
  118. }
  119. }
  120. // resetDuplicateBlocksOnFileSys 重置重复 ID 的块。 https://github.com/siyuan-note/siyuan/issues/7357
  121. func resetDuplicateBlocksOnFileSys() {
  122. defer logging.Recover()
  123. autoFixLock.Lock()
  124. defer autoFixLock.Unlock()
  125. util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 2, 5))
  126. boxes := Conf.GetBoxes()
  127. luteEngine := lute.New()
  128. blockIDs := map[string]bool{}
  129. needRefreshUI := false
  130. for _, box := range boxes {
  131. // 校验索引阶段自动删除历史遗留的笔记本 history 文件夹
  132. legacyHistory := filepath.Join(util.DataDir, box.ID, ".siyuan", "history")
  133. if gulu.File.IsDir(legacyHistory) {
  134. if removeErr := os.RemoveAll(legacyHistory); nil != removeErr {
  135. logging.LogErrorf("remove legacy history failed: %s", removeErr)
  136. } else {
  137. logging.LogInfof("removed legacy history [%s]", legacyHistory)
  138. }
  139. }
  140. boxPath := filepath.Join(util.DataDir, box.ID)
  141. var duplicatedTrees []*parse.Tree
  142. filelock.Walk(boxPath, func(path string, info os.FileInfo, err error) error {
  143. if nil == info {
  144. return nil
  145. }
  146. if info.IsDir() {
  147. if boxPath == path {
  148. // 跳过笔记本文件夹
  149. return nil
  150. }
  151. if strings.HasPrefix(info.Name(), ".") {
  152. return filepath.SkipDir
  153. }
  154. if !ast.IsNodeIDPattern(info.Name()) {
  155. return nil
  156. }
  157. return nil
  158. }
  159. if filepath.Ext(path) != ".sy" || strings.Contains(filepath.ToSlash(path), "/assets/") {
  160. return nil
  161. }
  162. if !ast.IsNodeIDPattern(strings.TrimSuffix(info.Name(), ".sy")) {
  163. logging.LogWarnf("invalid .sy file name [%s]", path)
  164. box.moveCorruptedData(path)
  165. return nil
  166. }
  167. p := path[len(boxPath):]
  168. p = filepath.ToSlash(p)
  169. tree, loadErr := filesys.LoadTree(box.ID, p, luteEngine)
  170. if nil != loadErr {
  171. logging.LogErrorf("load tree [%s] failed: %s", p, loadErr)
  172. return nil
  173. }
  174. needOverwrite := false
  175. ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
  176. if !entering || !n.IsBlock() {
  177. return ast.WalkContinue
  178. }
  179. if "" == n.ID {
  180. needOverwrite = true
  181. n.ID = ast.NewNodeID()
  182. n.SetIALAttr("id", n.ID)
  183. return ast.WalkContinue
  184. }
  185. if !blockIDs[n.ID] {
  186. blockIDs[n.ID] = true
  187. return ast.WalkContinue
  188. }
  189. // 存在重复的块 ID
  190. if ast.NodeDocument == n.Type {
  191. // 如果是文档根节点,则重置这颗树
  192. // 这里不能在迭代中重置,因为如果这个文档存在子文档的话,重置时会重命名子文档文件夹,后续迭代可能会导致子文档 ID 重复
  193. duplicatedTrees = append(duplicatedTrees, tree)
  194. return ast.WalkStop
  195. }
  196. // 其他情况,重置节点 ID
  197. needOverwrite = true
  198. n.ID = ast.NewNodeID()
  199. n.SetIALAttr("id", n.ID)
  200. needRefreshUI = true
  201. return ast.WalkContinue
  202. })
  203. if needOverwrite {
  204. logging.LogWarnf("exist more than one node with the same id in tree [%s], reset it", box.ID+p)
  205. if writeErr := filesys.WriteTree(tree); nil != writeErr {
  206. logging.LogErrorf("write tree [%s] failed: %s", p, writeErr)
  207. }
  208. }
  209. return nil
  210. })
  211. for _, tree := range duplicatedTrees {
  212. absPath := filepath.Join(boxPath, tree.Path)
  213. logging.LogWarnf("exist more than one tree with the same id [%s], reset it", absPath)
  214. recreateTree(tree, absPath)
  215. needRefreshUI = true
  216. }
  217. }
  218. if needRefreshUI {
  219. util.ReloadUI()
  220. go func() {
  221. time.Sleep(time.Second * 3)
  222. util.PushMsg(Conf.Language(190), 5000)
  223. }()
  224. }
  225. }
  226. func recreateTree(tree *parse.Tree, absPath string) {
  227. // 删除关于该树的所有块树数据,后面会调用 fixBlockTreeByFileSys() 进行订正补全
  228. treenode.RemoveBlockTreesByPathPrefix(strings.TrimSuffix(tree.Path, ".sy"))
  229. treenode.RemoveBlockTreesByRootID(tree.ID)
  230. resetTree(tree, "")
  231. if err := filesys.WriteTree(tree); nil != err {
  232. logging.LogWarnf("write tree [%s] failed: %s", tree.Path, err)
  233. return
  234. }
  235. if gulu.File.IsDir(strings.TrimSuffix(absPath, ".sy")) {
  236. // 重命名子文档文件夹
  237. from := strings.TrimSuffix(absPath, ".sy")
  238. to := filepath.Join(filepath.Dir(absPath), tree.ID)
  239. if renameErr := os.Rename(from, to); nil != renameErr {
  240. logging.LogWarnf("rename [%s] failed: %s", from, renameErr)
  241. return
  242. }
  243. }
  244. if err := filelock.Remove(absPath); nil != err {
  245. logging.LogWarnf("remove [%s] failed: %s", absPath, err)
  246. return
  247. }
  248. }
  249. // fixBlockTreeByFileSys 通过文件系统订正块树。
  250. func fixBlockTreeByFileSys() {
  251. defer logging.Recover()
  252. autoFixLock.Lock()
  253. defer autoFixLock.Unlock()
  254. util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 3, 5))
  255. boxes := Conf.GetOpenedBoxes()
  256. luteEngine := lute.New()
  257. for _, box := range boxes {
  258. boxPath := filepath.Join(util.DataDir, box.ID)
  259. var paths []string
  260. filelock.Walk(boxPath, func(path string, info os.FileInfo, err error) error {
  261. if boxPath == path {
  262. // 跳过根路径(笔记本文件夹)
  263. return nil
  264. }
  265. if nil == info {
  266. return nil
  267. }
  268. if info.IsDir() {
  269. if strings.HasPrefix(info.Name(), ".") {
  270. return filepath.SkipDir
  271. }
  272. return nil
  273. }
  274. if filepath.Ext(path) != ".sy" || strings.Contains(filepath.ToSlash(path), "/assets/") {
  275. return nil
  276. }
  277. p := path[len(boxPath):]
  278. p = filepath.ToSlash(p)
  279. paths = append(paths, p)
  280. return nil
  281. })
  282. size := len(paths)
  283. // 清理块树中的冗余数据
  284. treenode.ClearRedundantBlockTrees(box.ID, paths)
  285. // 重新索引缺失的块树
  286. missingPaths := treenode.GetNotExistPaths(box.ID, paths)
  287. for i, p := range missingPaths {
  288. id := path.Base(p)
  289. id = strings.TrimSuffix(id, ".sy")
  290. if !ast.IsNodeIDPattern(id) {
  291. continue
  292. }
  293. reindexTreeByPath(box.ID, p, i, size, luteEngine)
  294. if util.IsExiting.Load() {
  295. break
  296. }
  297. }
  298. if util.IsExiting.Load() {
  299. break
  300. }
  301. }
  302. // 清理已关闭的笔记本块树
  303. boxes = Conf.GetClosedBoxes()
  304. for _, box := range boxes {
  305. treenode.RemoveBlockTreesByBoxID(box.ID)
  306. }
  307. }
  308. // fixDatabaseIndexByBlockTree 通过块树订正数据库索引。
  309. func fixDatabaseIndexByBlockTree() {
  310. defer logging.Recover()
  311. util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 4, 5))
  312. rootUpdatedMap := treenode.GetRootUpdated()
  313. dbRootUpdatedMap, err := sql.GetRootUpdated()
  314. if nil == err {
  315. reindexTreeByUpdated(rootUpdatedMap, dbRootUpdatedMap)
  316. }
  317. }
  318. func reindexTreeByUpdated(rootUpdatedMap, dbRootUpdatedMap map[string]string) {
  319. i := -1
  320. size := len(rootUpdatedMap)
  321. luteEngine := util.NewLute()
  322. for rootID, updated := range rootUpdatedMap {
  323. i++
  324. if util.IsExiting.Load() {
  325. break
  326. }
  327. rootUpdated := dbRootUpdatedMap[rootID]
  328. if "" == rootUpdated {
  329. //logging.LogWarnf("not found tree [%s] in database, reindex it", rootID)
  330. reindexTree(rootID, i, size, luteEngine)
  331. continue
  332. }
  333. if "" == updated {
  334. // BlockTree 迁移,v2.6.3 之前没有 updated 字段
  335. reindexTree(rootID, i, size, luteEngine)
  336. continue
  337. }
  338. btUpdated, _ := time.Parse("20060102150405", updated)
  339. dbUpdated, _ := time.Parse("20060102150405", rootUpdated)
  340. if dbUpdated.Before(btUpdated.Add(-10 * time.Minute)) {
  341. logging.LogWarnf("tree [%s] is not up to date, reindex it", rootID)
  342. reindexTree(rootID, i, size, luteEngine)
  343. continue
  344. }
  345. if util.IsExiting.Load() {
  346. break
  347. }
  348. }
  349. var rootIDs []string
  350. for rootID := range dbRootUpdatedMap {
  351. if _, ok := rootUpdatedMap[rootID]; !ok {
  352. rootIDs = append(rootIDs, rootID)
  353. }
  354. if util.IsExiting.Load() {
  355. break
  356. }
  357. }
  358. rootIDs = gulu.Str.RemoveDuplicatedElem(rootIDs)
  359. roots := map[string]*sql.Block{}
  360. blocks := sql.GetBlocks(rootIDs)
  361. for _, block := range blocks {
  362. roots[block.RootID] = block
  363. }
  364. var toRemoveRootIDs []string
  365. for id, root := range roots {
  366. if nil == root {
  367. continue
  368. }
  369. toRemoveRootIDs = append(toRemoveRootIDs, id)
  370. if util.IsExiting.Load() {
  371. break
  372. }
  373. }
  374. toRemoveRootIDs = gulu.Str.RemoveDuplicatedElem(toRemoveRootIDs)
  375. //logging.LogWarnf("tree [%s] is not in block tree, remove it from [%s]", id, root.Box)
  376. sql.BatchRemoveTreeQueue(toRemoveRootIDs)
  377. }
  378. func reindexTreeByPath(box, p string, i, size int, luteEngine *lute.Lute) {
  379. tree, err := filesys.LoadTree(box, p, luteEngine)
  380. if nil != err {
  381. return
  382. }
  383. reindexTree0(tree, i, size)
  384. }
  385. func reindexTree(rootID string, i, size int, luteEngine *lute.Lute) {
  386. root := treenode.GetBlockTree(rootID)
  387. if nil == root {
  388. logging.LogWarnf("root block [%s] not found", rootID)
  389. return
  390. }
  391. tree, err := filesys.LoadTree(root.BoxID, root.Path, luteEngine)
  392. if nil != err {
  393. if os.IsNotExist(err) {
  394. // 文件系统上没有找到该 .sy 文件,则订正块树
  395. treenode.RemoveBlockTreesByRootID(rootID)
  396. }
  397. return
  398. }
  399. reindexTree0(tree, i, size)
  400. }
  401. func reindexTree0(tree *parse.Tree, i, size int) {
  402. updated := tree.Root.IALAttr("updated")
  403. if "" == updated {
  404. updated = util.TimeFromID(tree.Root.ID)
  405. tree.Root.SetIALAttr("updated", updated)
  406. indexWriteTreeUpsertQueue(tree)
  407. } else {
  408. treenode.IndexBlockTree(tree)
  409. sql.IndexTreeQueue(tree)
  410. }
  411. if 0 == i%64 {
  412. util.PushStatusBar(fmt.Sprintf(Conf.Language(183), i, size, html.EscapeString(path.Base(tree.HPath))))
  413. }
  414. }