index_fix.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. // SiYuan - Refactor your thinking
  2. // Copyright (c) 2020-present, b3log.org
  3. //
  4. // This program is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Affero General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // This program is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Affero General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Affero General Public License
  15. // along with this program. If not, see <https://www.gnu.org/licenses/>.
  16. package model
  17. import (
  18. "fmt"
  19. "os"
  20. "path"
  21. "path/filepath"
  22. "runtime/debug"
  23. "strings"
  24. "sync"
  25. "time"
  26. "github.com/88250/gulu"
  27. "github.com/88250/lute"
  28. "github.com/88250/lute/ast"
  29. "github.com/88250/lute/html"
  30. "github.com/88250/lute/parse"
  31. "github.com/siyuan-note/logging"
  32. "github.com/siyuan-note/siyuan/kernel/filesys"
  33. "github.com/siyuan-note/siyuan/kernel/sql"
  34. "github.com/siyuan-note/siyuan/kernel/task"
  35. "github.com/siyuan-note/siyuan/kernel/treenode"
  36. "github.com/siyuan-note/siyuan/kernel/util"
  37. )
  38. // FixIndexJob 自动校验数据库索引 https://github.com/siyuan-note/siyuan/issues/7016
  39. func FixIndexJob() {
  40. task.AppendTask(task.DatabaseIndexFix, removeDuplicateDatabaseIndex)
  41. sql.WaitForWritingDatabase()
  42. task.AppendTask(task.DatabaseIndexFix, resetDuplicateBlocksOnFileSys)
  43. task.AppendTask(task.DatabaseIndexFix, fixBlockTreeByFileSys)
  44. sql.WaitForWritingDatabase()
  45. task.AppendTask(task.DatabaseIndexFix, fixDatabaseIndexByBlockTree)
  46. sql.WaitForWritingDatabase()
  47. util.PushStatusBar(Conf.Language(185))
  48. debug.FreeOSMemory()
  49. }
  50. var autoFixLock = sync.Mutex{}
  51. // removeDuplicateDatabaseIndex 删除重复的数据库索引。
  52. func removeDuplicateDatabaseIndex() {
  53. defer logging.Recover()
  54. autoFixLock.Lock()
  55. defer autoFixLock.Unlock()
  56. util.PushStatusBar(Conf.Language(58))
  57. duplicatedRootIDs := sql.GetDuplicatedRootIDs("blocks")
  58. if 1 > len(duplicatedRootIDs) {
  59. duplicatedRootIDs = sql.GetDuplicatedRootIDs("blocks_fts")
  60. if 1 > len(duplicatedRootIDs) && !Conf.Search.CaseSensitive {
  61. duplicatedRootIDs = sql.GetDuplicatedRootIDs("blocks_fts_case_insensitive")
  62. }
  63. }
  64. util.PushStatusBar(Conf.Language(58))
  65. roots := sql.GetBlocks(duplicatedRootIDs)
  66. rootMap := map[string]*sql.Block{}
  67. for _, root := range roots {
  68. if nil == root {
  69. continue
  70. }
  71. rootMap[root.ID] = root
  72. }
  73. var toRemoveRootIDs []string
  74. var deletes int
  75. for _, rootID := range duplicatedRootIDs {
  76. root := rootMap[rootID]
  77. if nil == root {
  78. continue
  79. }
  80. deletes++
  81. toRemoveRootIDs = append(toRemoveRootIDs, rootID)
  82. if util.IsExiting {
  83. break
  84. }
  85. }
  86. toRemoveRootIDs = gulu.Str.RemoveDuplicatedElem(toRemoveRootIDs)
  87. sql.BatchRemoveTreeQueue(toRemoveRootIDs)
  88. if 0 < deletes {
  89. logging.LogWarnf("exist more than one tree duplicated [%d], reindex it", deletes)
  90. }
  91. }
  92. // resetDuplicateBlocksOnFileSys 重置重复 ID 的块。 https://github.com/siyuan-note/siyuan/issues/7357
  93. func resetDuplicateBlocksOnFileSys() {
  94. defer logging.Recover()
  95. autoFixLock.Lock()
  96. defer autoFixLock.Unlock()
  97. util.PushStatusBar(Conf.Language(58))
  98. boxes := Conf.GetBoxes()
  99. luteEngine := lute.New()
  100. blockIDs := map[string]bool{}
  101. needRefreshUI := false
  102. for _, box := range boxes {
  103. // 校验索引阶段自动删除历史遗留的笔记本 history 文件夹
  104. legacyHistory := filepath.Join(util.DataDir, box.ID, ".siyuan", "history")
  105. if gulu.File.IsDir(legacyHistory) {
  106. if removeErr := os.RemoveAll(legacyHistory); nil != removeErr {
  107. logging.LogErrorf("remove legacy history failed: %s", removeErr)
  108. } else {
  109. logging.LogInfof("removed legacy history [%s]", legacyHistory)
  110. }
  111. }
  112. boxPath := filepath.Join(util.DataDir, box.ID)
  113. var duplicatedTrees []*parse.Tree
  114. filepath.Walk(boxPath, func(path string, info os.FileInfo, err error) error {
  115. if info.IsDir() {
  116. if boxPath == path {
  117. // 跳过根路径(笔记本文件夹)
  118. return nil
  119. }
  120. if strings.HasPrefix(info.Name(), ".") {
  121. return filepath.SkipDir
  122. }
  123. if !ast.IsNodeIDPattern(info.Name()) {
  124. return nil
  125. }
  126. if util.IsEmptyDir(filepath.Join(path)) {
  127. // 删除空的子文档文件夹
  128. if removeErr := os.RemoveAll(path); nil != removeErr {
  129. logging.LogErrorf("remove empty folder failed: %s", removeErr)
  130. }
  131. return nil
  132. }
  133. return nil
  134. }
  135. if filepath.Ext(path) != ".sy" || strings.Contains(filepath.ToSlash(path), "/assets/") {
  136. return nil
  137. }
  138. if !ast.IsNodeIDPattern(strings.TrimSuffix(info.Name(), ".sy")) {
  139. logging.LogWarnf("invalid .sy file name [%s]", path)
  140. box.moveCorruptedData(path)
  141. return nil
  142. }
  143. p := path[len(boxPath):]
  144. p = filepath.ToSlash(p)
  145. tree, loadErr := filesys.LoadTree(box.ID, p, luteEngine)
  146. if nil != loadErr {
  147. logging.LogErrorf("load tree [%s] failed: %s", p, loadErr)
  148. return nil
  149. }
  150. needOverwrite := false
  151. ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
  152. if !entering || !n.IsBlock() {
  153. return ast.WalkContinue
  154. }
  155. if "" == n.ID {
  156. needOverwrite = true
  157. n.ID = ast.NewNodeID()
  158. n.SetIALAttr("id", n.ID)
  159. return ast.WalkContinue
  160. }
  161. if !blockIDs[n.ID] {
  162. blockIDs[n.ID] = true
  163. return ast.WalkContinue
  164. }
  165. // 存在重复的块 ID
  166. if ast.NodeDocument == n.Type {
  167. // 如果是文档根节点,则重置这颗树
  168. // 这里不能在迭代中重置,因为如果这个文档存在子文档的话,重置时会重命名子文档文件夹,后续迭代可能会导致子文档 ID 重复
  169. duplicatedTrees = append(duplicatedTrees, tree)
  170. return ast.WalkStop
  171. }
  172. // 其他情况,重置节点 ID
  173. needOverwrite = true
  174. n.ID = ast.NewNodeID()
  175. n.SetIALAttr("id", n.ID)
  176. needRefreshUI = true
  177. return ast.WalkContinue
  178. })
  179. if needOverwrite {
  180. logging.LogWarnf("exist more than one node with the same id in tree [%s], reset it", box.ID+p)
  181. if writeErr := filesys.WriteTree(tree); nil != writeErr {
  182. logging.LogErrorf("write tree [%s] failed: %s", p, writeErr)
  183. }
  184. }
  185. return nil
  186. })
  187. for _, tree := range duplicatedTrees {
  188. absPath := filepath.Join(boxPath, tree.Path)
  189. logging.LogWarnf("exist more than one tree with the same id [%s], reset it", absPath)
  190. recreateTree(tree, absPath)
  191. needRefreshUI = true
  192. }
  193. }
  194. if needRefreshUI {
  195. util.ReloadUI()
  196. go func() {
  197. time.Sleep(time.Second * 3)
  198. util.PushMsg(Conf.Language(190), 5000)
  199. }()
  200. }
  201. }
  202. func recreateTree(tree *parse.Tree, absPath string) {
  203. // 删除关于该树的所有块树数据,后面会调用 fixBlockTreeByFileSys() 进行订正补全
  204. treenode.RemoveBlockTreesByPathPrefix(strings.TrimSuffix(tree.Path, ".sy"))
  205. treenode.RemoveBlockTreesByRootID(tree.ID)
  206. resetTree(tree, "")
  207. if err := filesys.WriteTree(tree); nil != err {
  208. logging.LogWarnf("write tree [%s] failed: %s", tree.Path, err)
  209. return
  210. }
  211. if gulu.File.IsDir(strings.TrimSuffix(absPath, ".sy")) {
  212. // 重命名子文档文件夹
  213. from := strings.TrimSuffix(absPath, ".sy")
  214. to := filepath.Join(filepath.Dir(absPath), tree.ID)
  215. if renameErr := os.Rename(from, to); nil != renameErr {
  216. logging.LogWarnf("rename [%s] failed: %s", from, renameErr)
  217. return
  218. }
  219. }
  220. if err := os.RemoveAll(absPath); nil != err {
  221. logging.LogWarnf("remove [%s] failed: %s", absPath, err)
  222. return
  223. }
  224. }
  225. // fixBlockTreeByFileSys 通过文件系统订正块树。
  226. func fixBlockTreeByFileSys() {
  227. defer logging.Recover()
  228. autoFixLock.Lock()
  229. defer autoFixLock.Unlock()
  230. util.PushStatusBar(Conf.Language(58))
  231. boxes := Conf.GetOpenedBoxes()
  232. luteEngine := lute.New()
  233. for _, box := range boxes {
  234. boxPath := filepath.Join(util.DataDir, box.ID)
  235. var paths []string
  236. filepath.Walk(boxPath, func(path string, info os.FileInfo, err error) error {
  237. if boxPath == path {
  238. // 跳过根路径(笔记本文件夹)
  239. return nil
  240. }
  241. if info.IsDir() {
  242. if strings.HasPrefix(info.Name(), ".") {
  243. return filepath.SkipDir
  244. }
  245. return nil
  246. }
  247. if filepath.Ext(path) != ".sy" || strings.Contains(filepath.ToSlash(path), "/assets/") {
  248. return nil
  249. }
  250. p := path[len(boxPath):]
  251. p = filepath.ToSlash(p)
  252. paths = append(paths, p)
  253. return nil
  254. })
  255. size := len(paths)
  256. // 清理块树中的冗余数据
  257. treenode.ClearRedundantBlockTrees(box.ID, paths)
  258. // 重新索引缺失的块树
  259. missingPaths := treenode.GetNotExistPaths(box.ID, paths)
  260. for i, p := range missingPaths {
  261. id := path.Base(p)
  262. id = strings.TrimSuffix(id, ".sy")
  263. if !ast.IsNodeIDPattern(id) {
  264. continue
  265. }
  266. reindexTreeByPath(box.ID, p, i, size, luteEngine)
  267. if util.IsExiting {
  268. break
  269. }
  270. }
  271. if util.IsExiting {
  272. break
  273. }
  274. }
  275. // 清理已关闭的笔记本块树
  276. boxes = Conf.GetClosedBoxes()
  277. for _, box := range boxes {
  278. treenode.RemoveBlockTreesByBoxID(box.ID)
  279. }
  280. }
  281. // fixDatabaseIndexByBlockTree 通过块树订正数据库索引。
  282. func fixDatabaseIndexByBlockTree() {
  283. defer logging.Recover()
  284. util.PushStatusBar(Conf.Language(58))
  285. rootUpdatedMap := treenode.GetRootUpdated()
  286. dbRootUpdatedMap, err := sql.GetRootUpdated()
  287. if nil == err {
  288. reindexTreeByUpdated(rootUpdatedMap, dbRootUpdatedMap)
  289. }
  290. }
  291. func reindexTreeByUpdated(rootUpdatedMap, dbRootUpdatedMap map[string]string) {
  292. i := -1
  293. size := len(rootUpdatedMap)
  294. luteEngine := util.NewLute()
  295. for rootID, updated := range rootUpdatedMap {
  296. i++
  297. if util.IsExiting {
  298. break
  299. }
  300. rootUpdated := dbRootUpdatedMap[rootID]
  301. if "" == rootUpdated {
  302. //logging.LogWarnf("not found tree [%s] in database, reindex it", rootID)
  303. reindexTree(rootID, i, size, luteEngine)
  304. continue
  305. }
  306. if "" == updated {
  307. // BlockTree 迁移,v2.6.3 之前没有 updated 字段
  308. reindexTree(rootID, i, size, luteEngine)
  309. continue
  310. }
  311. btUpdated, _ := time.Parse("20060102150405", updated)
  312. dbUpdated, _ := time.Parse("20060102150405", rootUpdated)
  313. if dbUpdated.Before(btUpdated.Add(-10 * time.Minute)) {
  314. logging.LogWarnf("tree [%s] is not up to date, reindex it", rootID)
  315. reindexTree(rootID, i, size, luteEngine)
  316. continue
  317. }
  318. if util.IsExiting {
  319. break
  320. }
  321. }
  322. var rootIDs []string
  323. for rootID, _ := range dbRootUpdatedMap {
  324. if _, ok := rootUpdatedMap[rootID]; !ok {
  325. rootIDs = append(rootIDs, rootID)
  326. }
  327. if util.IsExiting {
  328. break
  329. }
  330. }
  331. rootIDs = gulu.Str.RemoveDuplicatedElem(rootIDs)
  332. roots := map[string]*sql.Block{}
  333. blocks := sql.GetBlocks(rootIDs)
  334. for _, block := range blocks {
  335. roots[block.RootID] = block
  336. }
  337. var toRemoveRootIDs []string
  338. for id, root := range roots {
  339. if nil == root {
  340. continue
  341. }
  342. toRemoveRootIDs = append(toRemoveRootIDs, id)
  343. if util.IsExiting {
  344. break
  345. }
  346. }
  347. toRemoveRootIDs = gulu.Str.RemoveDuplicatedElem(toRemoveRootIDs)
  348. //logging.LogWarnf("tree [%s] is not in block tree, remove it from [%s]", id, root.Box)
  349. sql.BatchRemoveTreeQueue(toRemoveRootIDs)
  350. }
  351. func reindexTreeByPath(box, p string, i, size int, luteEngine *lute.Lute) {
  352. tree, err := filesys.LoadTree(box, p, luteEngine)
  353. if nil != err {
  354. return
  355. }
  356. reindexTree0(tree, i, size)
  357. }
  358. func reindexTree(rootID string, i, size int, luteEngine *lute.Lute) {
  359. root := treenode.GetBlockTree(rootID)
  360. if nil == root {
  361. logging.LogWarnf("root block [%s] not found", rootID)
  362. return
  363. }
  364. tree, err := filesys.LoadTree(root.BoxID, root.Path, luteEngine)
  365. if nil != err {
  366. if os.IsNotExist(err) {
  367. // 文件系统上没有找到该 .sy 文件,则订正块树
  368. treenode.RemoveBlockTreesByRootID(rootID)
  369. }
  370. return
  371. }
  372. reindexTree0(tree, i, size)
  373. }
  374. func reindexTree0(tree *parse.Tree, i, size int) {
  375. updated := tree.Root.IALAttr("updated")
  376. if "" == updated {
  377. updated = util.TimeFromID(tree.Root.ID)
  378. tree.Root.SetIALAttr("updated", updated)
  379. indexWriteJSONQueue(tree)
  380. } else {
  381. treenode.IndexBlockTree(tree)
  382. sql.IndexTreeQueue(tree.Box, tree.Path)
  383. }
  384. if 0 == i%64 {
  385. util.PushStatusBar(fmt.Sprintf(Conf.Language(183), i, size, html.EscapeString(path.Base(tree.HPath))))
  386. }
  387. }