json_parser.go 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. // SiYuan - Refactor your thinking
  2. // Copyright (c) 2020-present, b3log.org
  3. //
  4. // This program is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Affero General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // This program is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Affero General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Affero General Public License
  15. // along with this program. If not, see <https://www.gnu.org/licenses/>.
  16. package filesys
  17. import (
  18. "bytes"
  19. "strings"
  20. "github.com/88250/gulu"
  21. "github.com/88250/lute/ast"
  22. "github.com/88250/lute/editor"
  23. "github.com/88250/lute/parse"
  24. "github.com/siyuan-note/siyuan/kernel/treenode"
  25. )
  26. func ParseJSONWithoutFix(jsonData []byte, options *parse.Options) (ret *parse.Tree, err error) {
  27. root := &ast.Node{}
  28. err = unmarshalJSON(jsonData, root)
  29. if nil != err {
  30. return
  31. }
  32. ret = &parse.Tree{Name: "", ID: root.ID, Root: &ast.Node{Type: ast.NodeDocument, ID: root.ID, Spec: root.Spec}, Context: &parse.Context{ParseOption: options}}
  33. ret.Root.KramdownIAL = parse.Map2IAL(root.Properties)
  34. ret.Root.SetIALAttr("type", "doc")
  35. ret.Context.Tip = ret.Root
  36. if nil == root.Children {
  37. return
  38. }
  39. idMap := map[string]bool{}
  40. for _, child := range root.Children {
  41. genTreeByJSON(child, ret, &idMap, nil, nil, true)
  42. }
  43. return
  44. }
  45. func ParseJSON(jsonData []byte, options *parse.Options) (ret *parse.Tree, needFix bool, err error) {
  46. root := &ast.Node{}
  47. err = unmarshalJSON(jsonData, root)
  48. if nil != err {
  49. return
  50. }
  51. ret = &parse.Tree{Name: "", ID: root.ID, Root: &ast.Node{Type: ast.NodeDocument, ID: root.ID, Spec: root.Spec}, Context: &parse.Context{ParseOption: options}}
  52. ret.Root.KramdownIAL = parse.Map2IAL(root.Properties)
  53. ret.Root.SetIALAttr("type", "doc")
  54. for _, kv := range ret.Root.KramdownIAL {
  55. if strings.Contains(kv[1], "\n") {
  56. val := kv[1]
  57. val = strings.ReplaceAll(val, "\n", editor.IALValEscNewLine)
  58. ret.Root.SetIALAttr(kv[0], val)
  59. needFix = true
  60. }
  61. }
  62. ret.Context.Tip = ret.Root
  63. if nil == root.Children {
  64. newPara := &ast.Node{Type: ast.NodeParagraph, ID: ast.NewNodeID()}
  65. newPara.SetIALAttr("id", newPara.ID)
  66. ret.Root.AppendChild(newPara)
  67. needFix = true
  68. return
  69. }
  70. needMigrate2Spec1 := false
  71. idMap := map[string]bool{}
  72. for _, child := range root.Children {
  73. genTreeByJSON(child, ret, &idMap, &needFix, &needMigrate2Spec1, false)
  74. }
  75. if nil == ret.Root.FirstChild {
  76. // 如果是空文档的话挂一个空段落上去
  77. newP := treenode.NewParagraph()
  78. ret.Root.AppendChild(newP)
  79. ret.Root.SetIALAttr("updated", newP.ID[:14])
  80. }
  81. if needMigrate2Spec1 {
  82. parse.NestedInlines2FlattedSpans(ret, false)
  83. needFix = true
  84. }
  85. return
  86. }
  87. func genTreeByJSON(node *ast.Node, tree *parse.Tree, idMap *map[string]bool, needFix, needMigrate2Spec1 *bool, ignoreFix bool) {
  88. node.Tokens, node.Type = gulu.Str.ToBytes(node.Data), ast.Str2NodeType(node.TypeStr)
  89. node.Data, node.TypeStr = "", ""
  90. node.KramdownIAL = parse.Map2IAL(node.Properties)
  91. node.Properties = nil
  92. if !ignoreFix {
  93. // 历史数据订正
  94. if -1 == node.Type {
  95. *needFix = true
  96. node.Type = ast.NodeParagraph
  97. node.AppendChild(&ast.Node{Type: ast.NodeText, Tokens: node.Tokens})
  98. node.Children = nil
  99. }
  100. if node.IsBlock() {
  101. if node.ID != node.IALAttr("id") {
  102. //某些情况下会导致 ID 和属性 id 不相同 https://ld246.com/article/1722826829447
  103. *needFix = true
  104. node.SetIALAttr("id", node.ID)
  105. }
  106. }
  107. switch node.Type {
  108. case ast.NodeList:
  109. if 1 > len(node.Children) {
  110. *needFix = true
  111. return // 忽略空列表
  112. }
  113. case ast.NodeListItem:
  114. if 1 > len(node.Children) {
  115. *needFix = true
  116. return // 忽略空列表项
  117. }
  118. case ast.NodeBlockquote:
  119. if 2 > len(node.Children) {
  120. *needFix = true
  121. return // 忽略空引述
  122. }
  123. case ast.NodeSuperBlock:
  124. if 4 > len(node.Children) {
  125. *needFix = true
  126. return // 忽略空超级块
  127. }
  128. case ast.NodeMathBlock:
  129. if 1 > len(node.Children) {
  130. *needFix = true
  131. return // 忽略空公式
  132. }
  133. case ast.NodeBlockQueryEmbed:
  134. if 1 > len(node.Children) {
  135. *needFix = true
  136. return // 忽略空查询嵌入块
  137. }
  138. case ast.NodeCodeBlock:
  139. if 4 > len(node.Children) {
  140. // https://ld246.com/article/1713689223067
  141. existCode := false
  142. for _, child := range node.Children {
  143. if ast.NodeCodeBlockCode.String() == child.TypeStr {
  144. existCode = true
  145. break
  146. }
  147. }
  148. if !existCode {
  149. *needFix = true
  150. return // 忽略空代码块
  151. }
  152. }
  153. }
  154. fixLegacyData(tree.Context.Tip, node, idMap, needFix, needMigrate2Spec1)
  155. }
  156. tree.Context.Tip.AppendChild(node)
  157. tree.Context.Tip = node
  158. defer tree.Context.ParentTip()
  159. if nil == node.Children {
  160. return
  161. }
  162. for _, child := range node.Children {
  163. genTreeByJSON(child, tree, idMap, needFix, needMigrate2Spec1, ignoreFix)
  164. }
  165. node.Children = nil
  166. }
  167. func fixLegacyData(tip, node *ast.Node, idMap *map[string]bool, needFix, needMigrate2Spec1 *bool) {
  168. if node.IsBlock() {
  169. if "" == node.ID {
  170. node.ID = ast.NewNodeID()
  171. node.SetIALAttr("id", node.ID)
  172. *needFix = true
  173. }
  174. if 0 < len(node.Children) && ast.NodeBr.String() == node.Children[len(node.Children)-1].TypeStr {
  175. // 剔除块尾多余的软换行 https://github.com/siyuan-note/siyuan/issues/6191
  176. node.Children = node.Children[:len(node.Children)-1]
  177. *needFix = true
  178. }
  179. for _, kv := range node.KramdownIAL {
  180. if strings.Contains(kv[0], "custom-av-key-") {
  181. // TODO: 数据库正式上线以后移除这里的修复
  182. // 删除数据库属性键值对 https://github.com/siyuan-note/siyuan/issues/9293
  183. node.RemoveIALAttr(kv[0])
  184. *needFix = true
  185. }
  186. }
  187. }
  188. if "" != node.ID {
  189. if _, ok := (*idMap)[node.ID]; ok {
  190. node.ID = ast.NewNodeID()
  191. node.SetIALAttr("id", node.ID)
  192. *needFix = true
  193. }
  194. (*idMap)[node.ID] = true
  195. }
  196. switch node.Type {
  197. case ast.NodeIFrame:
  198. if bytes.Contains(node.Tokens, gulu.Str.ToBytes("iframe-content")) {
  199. start := bytes.Index(node.Tokens, gulu.Str.ToBytes("<iframe"))
  200. end := bytes.Index(node.Tokens, gulu.Str.ToBytes("</iframe>"))
  201. node.Tokens = node.Tokens[start : end+9]
  202. *needFix = true
  203. }
  204. case ast.NodeWidget:
  205. if bytes.Contains(node.Tokens, gulu.Str.ToBytes("http://127.0.0.1:6806")) {
  206. node.Tokens = bytes.ReplaceAll(node.Tokens, []byte("http://127.0.0.1:6806"), nil)
  207. *needFix = true
  208. }
  209. case ast.NodeList:
  210. if nil != node.ListData && 3 != node.ListData.Typ && 0 < len(node.Children) &&
  211. nil != node.Children[0].ListData && 3 == node.Children[0].ListData.Typ {
  212. node.ListData.Typ = 3
  213. *needFix = true
  214. }
  215. case ast.NodeMark:
  216. if 3 == len(node.Children) && "NodeText" == node.Children[1].TypeStr {
  217. if strings.HasPrefix(node.Children[1].Data, " ") || strings.HasSuffix(node.Children[1].Data, " ") {
  218. node.Children[1].Data = strings.TrimSpace(node.Children[1].Data)
  219. *needFix = true
  220. }
  221. }
  222. case ast.NodeHeading:
  223. if 6 < node.HeadingLevel {
  224. node.HeadingLevel = 6
  225. *needFix = true
  226. }
  227. case ast.NodeLinkDest:
  228. if bytes.HasPrefix(node.Tokens, []byte("assets/")) && bytes.HasSuffix(node.Tokens, []byte(" ")) {
  229. node.Tokens = bytes.TrimSpace(node.Tokens)
  230. *needFix = true
  231. }
  232. case ast.NodeText:
  233. if nil != tip.LastChild && ast.NodeTagOpenMarker == tip.LastChild.Type && 1 > len(node.Tokens) {
  234. node.Tokens = []byte("Untitled")
  235. *needFix = true
  236. }
  237. case ast.NodeTagCloseMarker:
  238. if nil != tip.LastChild {
  239. if ast.NodeTagOpenMarker == tip.LastChild.Type {
  240. tip.AppendChild(&ast.Node{Type: ast.NodeText, Tokens: []byte("Untitled")})
  241. *needFix = true
  242. } else if "" == tip.LastChild.Text() {
  243. tip.LastChild.Type = ast.NodeText
  244. tip.LastChild.Tokens = []byte("Untitled")
  245. *needFix = true
  246. }
  247. }
  248. case ast.NodeBlockRef:
  249. // 建立索引时无法解析 `v2.2.0-` 版本的块引用 https://github.com/siyuan-note/siyuan/issues/6889
  250. // 早先的迁移程序有缺陷,漏迁移了块引用节点,这里检测到块引用节点后标识需要迁移
  251. *needMigrate2Spec1 = true
  252. case ast.NodeInlineHTML:
  253. *needFix = true
  254. node.Type = ast.NodeHTMLBlock
  255. }
  256. for _, kv := range node.KramdownIAL {
  257. if strings.Contains(kv[1], "\n") {
  258. val := kv[1]
  259. val = strings.ReplaceAll(val, "\n", editor.IALValEscNewLine)
  260. node.SetIALAttr(kv[0], val)
  261. *needFix = true
  262. }
  263. }
  264. }