index_fix.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488
  1. // SiYuan - Refactor your thinking
  2. // Copyright (c) 2020-present, b3log.org
  3. //
  4. // This program is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Affero General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // This program is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Affero General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Affero General Public License
  15. // along with this program. If not, see <https://www.gnu.org/licenses/>.
  16. package model
  17. import (
  18. "fmt"
  19. "os"
  20. "path"
  21. "path/filepath"
  22. "runtime/debug"
  23. "strings"
  24. "sync"
  25. "time"
  26. "github.com/88250/gulu"
  27. "github.com/88250/lute"
  28. "github.com/88250/lute/ast"
  29. "github.com/88250/lute/html"
  30. "github.com/88250/lute/parse"
  31. "github.com/siyuan-note/filelock"
  32. "github.com/siyuan-note/logging"
  33. "github.com/siyuan-note/siyuan/kernel/filesys"
  34. "github.com/siyuan-note/siyuan/kernel/sql"
  35. "github.com/siyuan-note/siyuan/kernel/task"
  36. "github.com/siyuan-note/siyuan/kernel/treenode"
  37. "github.com/siyuan-note/siyuan/kernel/util"
  38. )
  39. var (
  40. checkIndexOnce = sync.Once{}
  41. )
  42. // checkIndex 自动校验数据库索引,仅在数据同步执行完成后执行一次。
  43. func checkIndex() {
  44. checkIndexOnce.Do(func() {
  45. logging.LogInfof("start checking index...")
  46. task.AppendTask(task.DatabaseIndexFix, removeDuplicateDatabaseIndex)
  47. sql.WaitForWritingDatabase()
  48. task.AppendTask(task.DatabaseIndexFix, resetDuplicateBlocksOnFileSys)
  49. task.AppendTask(task.DatabaseIndexFix, fixBlockTreeByFileSys)
  50. sql.WaitForWritingDatabase()
  51. task.AppendTask(task.DatabaseIndexFix, fixDatabaseIndexByBlockTree)
  52. sql.WaitForWritingDatabase()
  53. task.AppendTask(task.DatabaseIndexFix, removeDuplicateDatabaseRefs)
  54. // 后面要加任务的话记得修改推送任务栏的进度 util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 1, 5))
  55. task.AppendTask(task.DatabaseIndexFix, func() {
  56. util.PushStatusBar(Conf.Language(185))
  57. })
  58. debug.FreeOSMemory()
  59. logging.LogInfof("finish checking index")
  60. })
  61. }
  62. var autoFixLock = sync.Mutex{}
  63. // removeDuplicateDatabaseRefs 删除重复的数据库引用关系。
  64. func removeDuplicateDatabaseRefs() {
  65. defer logging.Recover()
  66. autoFixLock.Lock()
  67. defer autoFixLock.Unlock()
  68. util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 5, 5))
  69. duplicatedRootIDs := sql.GetRefDuplicatedDefRootIDs()
  70. for _, rootID := range duplicatedRootIDs {
  71. refreshRefsByDefID(rootID)
  72. }
  73. for _, rootID := range duplicatedRootIDs {
  74. logging.LogWarnf("exist more than one ref duplicated [%s], reindex it", rootID)
  75. }
  76. }
  77. // removeDuplicateDatabaseIndex 删除重复的数据库索引。
  78. func removeDuplicateDatabaseIndex() {
  79. defer logging.Recover()
  80. autoFixLock.Lock()
  81. defer autoFixLock.Unlock()
  82. util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 1, 5))
  83. duplicatedRootIDs := sql.GetDuplicatedRootIDs("blocks")
  84. if 1 > len(duplicatedRootIDs) {
  85. duplicatedRootIDs = sql.GetDuplicatedRootIDs("blocks_fts")
  86. if 1 > len(duplicatedRootIDs) && !Conf.Search.CaseSensitive {
  87. duplicatedRootIDs = sql.GetDuplicatedRootIDs("blocks_fts_case_insensitive")
  88. }
  89. }
  90. roots := sql.GetBlocks(duplicatedRootIDs)
  91. rootMap := map[string]*sql.Block{}
  92. for _, root := range roots {
  93. if nil == root {
  94. continue
  95. }
  96. rootMap[root.ID] = root
  97. }
  98. var toRemoveRootIDs []string
  99. var deletes int
  100. for _, rootID := range duplicatedRootIDs {
  101. root := rootMap[rootID]
  102. if nil == root {
  103. continue
  104. }
  105. deletes++
  106. toRemoveRootIDs = append(toRemoveRootIDs, rootID)
  107. if util.IsExiting.Load() {
  108. break
  109. }
  110. }
  111. toRemoveRootIDs = gulu.Str.RemoveDuplicatedElem(toRemoveRootIDs)
  112. sql.BatchRemoveTreeQueue(toRemoveRootIDs)
  113. if 0 < deletes {
  114. logging.LogWarnf("exist more than one tree duplicated [%d], reindex it", deletes)
  115. }
  116. }
  117. // resetDuplicateBlocksOnFileSys 重置重复 ID 的块。 https://github.com/siyuan-note/siyuan/issues/7357
  118. func resetDuplicateBlocksOnFileSys() {
  119. defer logging.Recover()
  120. autoFixLock.Lock()
  121. defer autoFixLock.Unlock()
  122. util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 2, 5))
  123. boxes := Conf.GetBoxes()
  124. luteEngine := lute.New()
  125. blockIDs := map[string]bool{}
  126. needRefreshUI := false
  127. for _, box := range boxes {
  128. // 校验索引阶段自动删除历史遗留的笔记本 history 文件夹
  129. legacyHistory := filepath.Join(util.DataDir, box.ID, ".siyuan", "history")
  130. if gulu.File.IsDir(legacyHistory) {
  131. if removeErr := os.RemoveAll(legacyHistory); nil != removeErr {
  132. logging.LogErrorf("remove legacy history failed: %s", removeErr)
  133. } else {
  134. logging.LogInfof("removed legacy history [%s]", legacyHistory)
  135. }
  136. }
  137. boxPath := filepath.Join(util.DataDir, box.ID)
  138. var duplicatedTrees []*parse.Tree
  139. filelock.Walk(boxPath, func(path string, info os.FileInfo, err error) error {
  140. if nil == info {
  141. return nil
  142. }
  143. if info.IsDir() {
  144. if boxPath == path {
  145. // 跳过笔记本文件夹
  146. return nil
  147. }
  148. if strings.HasPrefix(info.Name(), ".") {
  149. return filepath.SkipDir
  150. }
  151. if !ast.IsNodeIDPattern(info.Name()) {
  152. return nil
  153. }
  154. return nil
  155. }
  156. if filepath.Ext(path) != ".sy" || strings.Contains(filepath.ToSlash(path), "/assets/") {
  157. return nil
  158. }
  159. if !ast.IsNodeIDPattern(strings.TrimSuffix(info.Name(), ".sy")) {
  160. logging.LogWarnf("invalid .sy file name [%s]", path)
  161. box.moveCorruptedData(path)
  162. return nil
  163. }
  164. p := path[len(boxPath):]
  165. p = filepath.ToSlash(p)
  166. tree, loadErr := filesys.LoadTree(box.ID, p, luteEngine)
  167. if nil != loadErr {
  168. logging.LogErrorf("load tree [%s] failed: %s", p, loadErr)
  169. return nil
  170. }
  171. needOverwrite := false
  172. ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
  173. if !entering || !n.IsBlock() {
  174. return ast.WalkContinue
  175. }
  176. if "" == n.ID {
  177. needOverwrite = true
  178. n.ID = ast.NewNodeID()
  179. n.SetIALAttr("id", n.ID)
  180. return ast.WalkContinue
  181. }
  182. if !blockIDs[n.ID] {
  183. blockIDs[n.ID] = true
  184. return ast.WalkContinue
  185. }
  186. // 存在重复的块 ID
  187. if ast.NodeDocument == n.Type {
  188. // 如果是文档根节点,则重置这颗树
  189. // 这里不能在迭代中重置,因为如果这个文档存在子文档的话,重置时会重命名子文档文件夹,后续迭代可能会导致子文档 ID 重复
  190. duplicatedTrees = append(duplicatedTrees, tree)
  191. return ast.WalkStop
  192. }
  193. // 其他情况,重置节点 ID
  194. needOverwrite = true
  195. n.ID = ast.NewNodeID()
  196. n.SetIALAttr("id", n.ID)
  197. needRefreshUI = true
  198. return ast.WalkContinue
  199. })
  200. if needOverwrite {
  201. logging.LogWarnf("exist more than one node with the same id in tree [%s], reset it", box.ID+p)
  202. if writeErr := filesys.WriteTree(tree); nil != writeErr {
  203. logging.LogErrorf("write tree [%s] failed: %s", p, writeErr)
  204. }
  205. }
  206. return nil
  207. })
  208. for _, tree := range duplicatedTrees {
  209. absPath := filepath.Join(boxPath, tree.Path)
  210. logging.LogWarnf("exist more than one tree with the same id [%s], reset it", absPath)
  211. recreateTree(tree, absPath)
  212. needRefreshUI = true
  213. }
  214. }
  215. if needRefreshUI {
  216. util.ReloadUI()
  217. go func() {
  218. time.Sleep(time.Second * 3)
  219. util.PushMsg(Conf.Language(190), 5000)
  220. }()
  221. }
  222. }
  223. func recreateTree(tree *parse.Tree, absPath string) {
  224. // 删除关于该树的所有块树数据,后面会调用 fixBlockTreeByFileSys() 进行订正补全
  225. treenode.RemoveBlockTreesByPathPrefix(strings.TrimSuffix(tree.Path, ".sy"))
  226. treenode.RemoveBlockTreesByRootID(tree.ID)
  227. resetTree(tree, "")
  228. if err := filesys.WriteTree(tree); nil != err {
  229. logging.LogWarnf("write tree [%s] failed: %s", tree.Path, err)
  230. return
  231. }
  232. if gulu.File.IsDir(strings.TrimSuffix(absPath, ".sy")) {
  233. // 重命名子文档文件夹
  234. from := strings.TrimSuffix(absPath, ".sy")
  235. to := filepath.Join(filepath.Dir(absPath), tree.ID)
  236. if renameErr := os.Rename(from, to); nil != renameErr {
  237. logging.LogWarnf("rename [%s] failed: %s", from, renameErr)
  238. return
  239. }
  240. }
  241. if err := filelock.Remove(absPath); nil != err {
  242. logging.LogWarnf("remove [%s] failed: %s", absPath, err)
  243. return
  244. }
  245. }
  246. // fixBlockTreeByFileSys 通过文件系统订正块树。
  247. func fixBlockTreeByFileSys() {
  248. defer logging.Recover()
  249. autoFixLock.Lock()
  250. defer autoFixLock.Unlock()
  251. util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 3, 5))
  252. boxes := Conf.GetOpenedBoxes()
  253. luteEngine := lute.New()
  254. for _, box := range boxes {
  255. boxPath := filepath.Join(util.DataDir, box.ID)
  256. var paths []string
  257. filelock.Walk(boxPath, func(path string, info os.FileInfo, err error) error {
  258. if boxPath == path {
  259. // 跳过根路径(笔记本文件夹)
  260. return nil
  261. }
  262. if nil == info {
  263. return nil
  264. }
  265. if info.IsDir() {
  266. if strings.HasPrefix(info.Name(), ".") {
  267. return filepath.SkipDir
  268. }
  269. return nil
  270. }
  271. if filepath.Ext(path) != ".sy" || strings.Contains(filepath.ToSlash(path), "/assets/") {
  272. return nil
  273. }
  274. p := path[len(boxPath):]
  275. p = filepath.ToSlash(p)
  276. paths = append(paths, p)
  277. return nil
  278. })
  279. size := len(paths)
  280. // 清理块树中的冗余数据
  281. treenode.ClearRedundantBlockTrees(box.ID, paths)
  282. // 重新索引缺失的块树
  283. missingPaths := treenode.GetNotExistPaths(box.ID, paths)
  284. for i, p := range missingPaths {
  285. id := path.Base(p)
  286. id = strings.TrimSuffix(id, ".sy")
  287. if !ast.IsNodeIDPattern(id) {
  288. continue
  289. }
  290. reindexTreeByPath(box.ID, p, i, size, luteEngine)
  291. if util.IsExiting.Load() {
  292. break
  293. }
  294. }
  295. if util.IsExiting.Load() {
  296. break
  297. }
  298. }
  299. // 清理已关闭的笔记本块树
  300. boxes = Conf.GetClosedBoxes()
  301. for _, box := range boxes {
  302. treenode.RemoveBlockTreesByBoxID(box.ID)
  303. }
  304. }
  305. // fixDatabaseIndexByBlockTree 通过块树订正数据库索引。
  306. func fixDatabaseIndexByBlockTree() {
  307. defer logging.Recover()
  308. util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 4, 5))
  309. rootUpdatedMap := treenode.GetRootUpdated()
  310. dbRootUpdatedMap, err := sql.GetRootUpdated()
  311. if nil == err {
  312. reindexTreeByUpdated(rootUpdatedMap, dbRootUpdatedMap)
  313. }
  314. }
  315. func reindexTreeByUpdated(rootUpdatedMap, dbRootUpdatedMap map[string]string) {
  316. i := -1
  317. size := len(rootUpdatedMap)
  318. luteEngine := util.NewLute()
  319. for rootID, updated := range rootUpdatedMap {
  320. i++
  321. if util.IsExiting.Load() {
  322. break
  323. }
  324. rootUpdated := dbRootUpdatedMap[rootID]
  325. if "" == rootUpdated {
  326. //logging.LogWarnf("not found tree [%s] in database, reindex it", rootID)
  327. reindexTree(rootID, i, size, luteEngine)
  328. continue
  329. }
  330. if "" == updated {
  331. // BlockTree 迁移,v2.6.3 之前没有 updated 字段
  332. reindexTree(rootID, i, size, luteEngine)
  333. continue
  334. }
  335. btUpdated, _ := time.Parse("20060102150405", updated)
  336. dbUpdated, _ := time.Parse("20060102150405", rootUpdated)
  337. if dbUpdated.Before(btUpdated.Add(-10 * time.Minute)) {
  338. logging.LogWarnf("tree [%s] is not up to date, reindex it", rootID)
  339. reindexTree(rootID, i, size, luteEngine)
  340. continue
  341. }
  342. if util.IsExiting.Load() {
  343. break
  344. }
  345. }
  346. var rootIDs []string
  347. for rootID := range dbRootUpdatedMap {
  348. if _, ok := rootUpdatedMap[rootID]; !ok {
  349. rootIDs = append(rootIDs, rootID)
  350. }
  351. if util.IsExiting.Load() {
  352. break
  353. }
  354. }
  355. rootIDs = gulu.Str.RemoveDuplicatedElem(rootIDs)
  356. roots := map[string]*sql.Block{}
  357. blocks := sql.GetBlocks(rootIDs)
  358. for _, block := range blocks {
  359. roots[block.RootID] = block
  360. }
  361. var toRemoveRootIDs []string
  362. for id, root := range roots {
  363. if nil == root {
  364. continue
  365. }
  366. toRemoveRootIDs = append(toRemoveRootIDs, id)
  367. if util.IsExiting.Load() {
  368. break
  369. }
  370. }
  371. toRemoveRootIDs = gulu.Str.RemoveDuplicatedElem(toRemoveRootIDs)
  372. //logging.LogWarnf("tree [%s] is not in block tree, remove it from [%s]", id, root.Box)
  373. sql.BatchRemoveTreeQueue(toRemoveRootIDs)
  374. }
  375. func reindexTreeByPath(box, p string, i, size int, luteEngine *lute.Lute) {
  376. tree, err := filesys.LoadTree(box, p, luteEngine)
  377. if nil != err {
  378. return
  379. }
  380. reindexTree0(tree, i, size)
  381. }
  382. func reindexTree(rootID string, i, size int, luteEngine *lute.Lute) {
  383. root := treenode.GetBlockTree(rootID)
  384. if nil == root {
  385. logging.LogWarnf("root block [%s] not found", rootID)
  386. return
  387. }
  388. tree, err := filesys.LoadTree(root.BoxID, root.Path, luteEngine)
  389. if nil != err {
  390. if os.IsNotExist(err) {
  391. // 文件系统上没有找到该 .sy 文件,则订正块树
  392. treenode.RemoveBlockTreesByRootID(rootID)
  393. }
  394. return
  395. }
  396. reindexTree0(tree, i, size)
  397. }
  398. func reindexTree0(tree *parse.Tree, i, size int) {
  399. updated := tree.Root.IALAttr("updated")
  400. if "" == updated {
  401. updated = util.TimeFromID(tree.Root.ID)
  402. tree.Root.SetIALAttr("updated", updated)
  403. indexWriteTreeUpsertQueue(tree)
  404. } else {
  405. treenode.IndexBlockTree(tree)
  406. sql.IndexTreeQueue(tree)
  407. }
  408. if 0 == i%64 {
  409. util.PushStatusBar(fmt.Sprintf(Conf.Language(183), i, size, html.EscapeString(path.Base(tree.HPath))))
  410. }
  411. }