import.go 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097
  1. // SiYuan - Refactor your thinking
  2. // Copyright (c) 2020-present, b3log.org
  3. //
  4. // This program is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Affero General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // This program is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Affero General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Affero General Public License
  15. // along with this program. If not, see <https://www.gnu.org/licenses/>.
  16. package model
  17. import (
  18. "bytes"
  19. "encoding/base64"
  20. "encoding/json"
  21. "errors"
  22. "fmt"
  23. "image"
  24. "image/jpeg"
  25. "image/png"
  26. "io"
  27. "io/fs"
  28. "math/rand"
  29. "os"
  30. "path"
  31. "path/filepath"
  32. "runtime/debug"
  33. "sort"
  34. "strconv"
  35. "strings"
  36. "time"
  37. "github.com/88250/gulu"
  38. "github.com/88250/lute/ast"
  39. "github.com/88250/lute/html"
  40. "github.com/88250/lute/html/atom"
  41. "github.com/88250/lute/parse"
  42. "github.com/88250/lute/render"
  43. "github.com/siyuan-note/filelock"
  44. "github.com/siyuan-note/logging"
  45. "github.com/siyuan-note/siyuan/kernel/filesys"
  46. "github.com/siyuan-note/siyuan/kernel/sql"
  47. "github.com/siyuan-note/siyuan/kernel/treenode"
  48. "github.com/siyuan-note/siyuan/kernel/util"
  49. )
  50. func HTML2Markdown(htmlStr string) (markdown string, err error) {
  51. assetDirPath := filepath.Join(util.DataDir, "assets")
  52. luteEngine := util.NewLute()
  53. tree := luteEngine.HTML2Tree(htmlStr)
  54. ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
  55. if !entering || ast.NodeLinkDest != n.Type {
  56. return ast.WalkContinue
  57. }
  58. dest := n.TokensStr()
  59. if strings.HasPrefix(dest, "data:image") && strings.Contains(dest, ";base64,") {
  60. processBase64Img(n, dest, assetDirPath, err)
  61. return ast.WalkContinue
  62. }
  63. return ast.WalkContinue
  64. })
  65. var formatted []byte
  66. renderer := render.NewFormatRenderer(tree, luteEngine.RenderOptions)
  67. for nodeType, rendererFunc := range luteEngine.HTML2MdRendererFuncs {
  68. renderer.ExtRendererFuncs[nodeType] = rendererFunc
  69. }
  70. formatted = renderer.Render()
  71. markdown = gulu.Str.FromBytes(formatted)
  72. return
  73. }
  74. func ImportSY(zipPath, boxID, toPath string) (err error) {
  75. util.PushEndlessProgress(Conf.Language(73))
  76. defer util.ClearPushProgress(100)
  77. syncLock.Lock()
  78. defer syncLock.Unlock()
  79. baseName := filepath.Base(zipPath)
  80. ext := filepath.Ext(baseName)
  81. baseName = strings.TrimSuffix(baseName, ext)
  82. unzipPath := filepath.Join(filepath.Dir(zipPath), baseName+"-"+gulu.Rand.String(7))
  83. err = gulu.Zip.Unzip(zipPath, unzipPath)
  84. if nil != err {
  85. return
  86. }
  87. defer os.RemoveAll(unzipPath)
  88. var syPaths []string
  89. filepath.Walk(unzipPath, func(path string, info fs.FileInfo, err error) error {
  90. if nil != err {
  91. return err
  92. }
  93. if !info.IsDir() && strings.HasSuffix(info.Name(), ".sy") {
  94. syPaths = append(syPaths, path)
  95. }
  96. return nil
  97. })
  98. unzipRootPaths, err := filepath.Glob(unzipPath + "/*")
  99. if nil != err {
  100. return
  101. }
  102. if 1 != len(unzipRootPaths) {
  103. logging.LogErrorf("invalid .sy.zip")
  104. return errors.New(Conf.Language(199))
  105. }
  106. unzipRootPath := unzipRootPaths[0]
  107. name := filepath.Base(unzipRootPath)
  108. if strings.HasPrefix(name, "data-20") && len("data-20230321175442") == len(name) {
  109. return errors.New(Conf.Language(199))
  110. }
  111. luteEngine := util.NewLute()
  112. blockIDs := map[string]string{}
  113. trees := map[string]*parse.Tree{}
  114. // 重新生成块 ID
  115. for _, syPath := range syPaths {
  116. data, readErr := os.ReadFile(syPath)
  117. if nil != readErr {
  118. logging.LogErrorf("read .sy [%s] failed: %s", syPath, readErr)
  119. err = readErr
  120. return
  121. }
  122. tree, _, parseErr := filesys.ParseJSON(data, luteEngine.ParseOptions)
  123. if nil != parseErr {
  124. logging.LogErrorf("parse .sy [%s] failed: %s", syPath, parseErr)
  125. err = parseErr
  126. return
  127. }
  128. ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
  129. if !entering {
  130. return ast.WalkContinue
  131. }
  132. if "" != n.ID {
  133. newNodeID := ast.NewNodeID()
  134. blockIDs[n.ID] = newNodeID
  135. n.ID = newNodeID
  136. n.SetIALAttr("id", newNodeID)
  137. }
  138. return ast.WalkContinue
  139. })
  140. tree.ID = tree.Root.ID
  141. tree.Path = filepath.ToSlash(strings.TrimPrefix(syPath, unzipRootPath))
  142. trees[tree.ID] = tree
  143. }
  144. // 引用和嵌入指向重新生成的块 ID
  145. for _, tree := range trees {
  146. ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
  147. if !entering {
  148. return ast.WalkContinue
  149. }
  150. if treenode.IsBlockRef(n) {
  151. defID, _, _ := treenode.GetBlockRef(n)
  152. newDefID := blockIDs[defID]
  153. if "" != newDefID {
  154. n.TextMarkBlockRefID = newDefID
  155. }
  156. } else if ast.NodeBlockQueryEmbedScript == n.Type {
  157. for oldID, newID := range blockIDs {
  158. // 导入 `.sy.zip` 后查询嵌入块失效 https://github.com/siyuan-note/siyuan/issues/5316
  159. n.Tokens = bytes.ReplaceAll(n.Tokens, []byte(oldID), []byte(newID))
  160. }
  161. }
  162. return ast.WalkContinue
  163. })
  164. }
  165. // 写回 .sy
  166. for _, tree := range trees {
  167. syPath := filepath.Join(unzipRootPath, tree.Path)
  168. if "" == tree.Root.Spec {
  169. parse.NestedInlines2FlattedSpans(tree)
  170. tree.Root.Spec = "1"
  171. }
  172. renderer := render.NewJSONRenderer(tree, luteEngine.RenderOptions)
  173. data := renderer.Render()
  174. if !util.UseSingleLineSave {
  175. buf := bytes.Buffer{}
  176. buf.Grow(1024 * 1024 * 2)
  177. if err = json.Indent(&buf, data, "", "\t"); nil != err {
  178. return
  179. }
  180. data = buf.Bytes()
  181. }
  182. if err = os.WriteFile(syPath, data, 0644); nil != err {
  183. logging.LogErrorf("write .sy [%s] failed: %s", syPath, err)
  184. return
  185. }
  186. newSyPath := filepath.Join(filepath.Dir(syPath), tree.ID+".sy")
  187. if err = filelock.Move(syPath, newSyPath); nil != err {
  188. logging.LogErrorf("rename .sy from [%s] to [%s] failed: %s", syPath, newSyPath, err)
  189. return
  190. }
  191. }
  192. // 合并 sort.json
  193. fullSortIDs := map[string]int{}
  194. sortIDs := map[string]int{}
  195. var sortData []byte
  196. var sortErr error
  197. sortPath := filepath.Join(unzipRootPath, ".siyuan", "sort.json")
  198. if gulu.File.IsExist(sortPath) {
  199. sortData, sortErr = filelock.ReadFile(sortPath)
  200. if nil != sortErr {
  201. logging.LogErrorf("read import sort conf failed: %s", sortErr)
  202. }
  203. if sortErr = gulu.JSON.UnmarshalJSON(sortData, &sortIDs); nil != sortErr {
  204. logging.LogErrorf("unmarshal sort conf failed: %s", sortErr)
  205. }
  206. boxSortPath := filepath.Join(util.DataDir, boxID, ".siyuan", "sort.json")
  207. if gulu.File.IsExist(boxSortPath) {
  208. sortData, sortErr = filelock.ReadFile(boxSortPath)
  209. if nil != sortErr {
  210. logging.LogErrorf("read box sort conf failed: %s", sortErr)
  211. }
  212. if sortErr = gulu.JSON.UnmarshalJSON(sortData, &fullSortIDs); nil != sortErr {
  213. logging.LogErrorf("unmarshal box sort conf failed: %s", sortErr)
  214. }
  215. }
  216. for oldID, sort := range sortIDs {
  217. if newID := blockIDs[oldID]; "" != newID {
  218. fullSortIDs[newID] = sort
  219. }
  220. }
  221. sortData, sortErr = gulu.JSON.MarshalJSON(fullSortIDs)
  222. if nil != sortErr {
  223. logging.LogErrorf("marshal box full sort conf failed: %s", sortErr)
  224. } else {
  225. sortErr = filelock.WriteFile(boxSortPath, sortData)
  226. if nil != sortErr {
  227. logging.LogErrorf("write box full sort conf failed: %s", sortErr)
  228. }
  229. }
  230. if removeErr := os.RemoveAll(sortPath); nil != removeErr {
  231. logging.LogErrorf("remove temp sort conf failed: %s", removeErr)
  232. }
  233. }
  234. // 重命名文件路径
  235. renamePaths := map[string]string{}
  236. filepath.Walk(unzipRootPath, func(path string, info fs.FileInfo, err error) error {
  237. if nil != err {
  238. return err
  239. }
  240. if info.IsDir() && ast.IsNodeIDPattern(info.Name()) {
  241. renamePaths[path] = path
  242. }
  243. return nil
  244. })
  245. for p, _ := range renamePaths {
  246. originalPath := p
  247. p = strings.TrimPrefix(p, unzipRootPath)
  248. p = filepath.ToSlash(p)
  249. parts := strings.Split(p, "/")
  250. buf := bytes.Buffer{}
  251. buf.WriteString("/")
  252. for i, part := range parts {
  253. if "" == part {
  254. continue
  255. }
  256. newNodeID := blockIDs[part]
  257. if "" != newNodeID {
  258. buf.WriteString(newNodeID)
  259. } else {
  260. buf.WriteString(part)
  261. }
  262. if i < len(parts)-1 {
  263. buf.WriteString("/")
  264. }
  265. }
  266. newPath := buf.String()
  267. renamePaths[originalPath] = filepath.Join(unzipRootPath, newPath)
  268. }
  269. var oldPaths []string
  270. for oldPath, _ := range renamePaths {
  271. oldPaths = append(oldPaths, oldPath)
  272. }
  273. sort.Slice(oldPaths, func(i, j int) bool {
  274. return strings.Count(oldPaths[i], string(os.PathSeparator)) < strings.Count(oldPaths[j], string(os.PathSeparator))
  275. })
  276. for i, oldPath := range oldPaths {
  277. newPath := renamePaths[oldPath]
  278. if err = filelock.Move(oldPath, newPath); nil != err {
  279. logging.LogErrorf("rename path from [%s] to [%s] failed: %s", oldPath, renamePaths[oldPath], err)
  280. return errors.New("rename path failed")
  281. }
  282. delete(renamePaths, oldPath)
  283. var toRemoves []string
  284. newRenamedPaths := map[string]string{}
  285. for oldP, newP := range renamePaths {
  286. if strings.HasPrefix(oldP, oldPath) {
  287. renamedOldP := strings.Replace(oldP, oldPath, newPath, 1)
  288. newRenamedPaths[renamedOldP] = newP
  289. toRemoves = append(toRemoves, oldPath)
  290. }
  291. }
  292. for _, toRemove := range toRemoves {
  293. delete(renamePaths, toRemove)
  294. }
  295. for oldP, newP := range newRenamedPaths {
  296. renamePaths[oldP] = newP
  297. }
  298. for j := i + 1; j < len(oldPaths); j++ {
  299. if strings.HasPrefix(oldPaths[j], oldPath) {
  300. renamedOldP := strings.Replace(oldPaths[j], oldPath, newPath, 1)
  301. oldPaths[j] = renamedOldP
  302. }
  303. }
  304. }
  305. // 将包含的资源文件统一移动到 data/assets/ 下
  306. var assetsDirs []string
  307. filepath.Walk(unzipRootPath, func(path string, info fs.FileInfo, err error) error {
  308. if strings.Contains(path, "assets") && info.IsDir() {
  309. assetsDirs = append(assetsDirs, path)
  310. }
  311. return nil
  312. })
  313. dataAssets := filepath.Join(util.DataDir, "assets")
  314. for _, assets := range assetsDirs {
  315. if gulu.File.IsDir(assets) {
  316. if err = filelock.Copy(assets, dataAssets); nil != err {
  317. logging.LogErrorf("copy assets from [%s] to [%s] failed: %s", assets, dataAssets, err)
  318. return
  319. }
  320. }
  321. os.RemoveAll(assets)
  322. }
  323. var baseTargetPath string
  324. if "/" == toPath {
  325. baseTargetPath = "/"
  326. } else {
  327. block := treenode.GetBlockTreeRootByPath(boxID, toPath)
  328. if nil == block {
  329. logging.LogErrorf("not found block by path [%s]", toPath)
  330. return nil
  331. }
  332. baseTargetPath = strings.TrimSuffix(block.Path, ".sy")
  333. }
  334. targetDir := filepath.Join(util.DataDir, boxID, baseTargetPath)
  335. if err = os.MkdirAll(targetDir, 0755); nil != err {
  336. return
  337. }
  338. var treePaths []string
  339. filepath.Walk(unzipRootPath, func(path string, info fs.FileInfo, err error) error {
  340. if info.IsDir() {
  341. if strings.HasPrefix(info.Name(), ".") {
  342. return filepath.SkipDir
  343. }
  344. return nil
  345. }
  346. if !strings.HasSuffix(info.Name(), ".sy") {
  347. return nil
  348. }
  349. p := strings.TrimPrefix(path, unzipRootPath)
  350. p = filepath.ToSlash(p)
  351. treePaths = append(treePaths, p)
  352. return nil
  353. })
  354. if err = filelock.Copy(unzipRootPath, targetDir); nil != err {
  355. logging.LogErrorf("copy data dir from [%s] to [%s] failed: %s", unzipRootPath, util.DataDir, err)
  356. err = errors.New("copy data failed")
  357. return
  358. }
  359. boxAbsPath := filepath.Join(util.DataDir, boxID)
  360. for _, treePath := range treePaths {
  361. absPath := filepath.Join(targetDir, treePath)
  362. p := strings.TrimPrefix(absPath, boxAbsPath)
  363. p = filepath.ToSlash(p)
  364. tree, err := filesys.LoadTree(boxID, p, luteEngine)
  365. if nil != err {
  366. logging.LogErrorf("load tree [%s] failed: %s", treePath, err)
  367. continue
  368. }
  369. treenode.IndexBlockTree(tree)
  370. sql.UpsertTreeQueue(tree)
  371. }
  372. IncSync()
  373. return
  374. }
  375. func ImportData(zipPath string) (err error) {
  376. util.PushEndlessProgress(Conf.Language(73))
  377. defer util.ClearPushProgress(100)
  378. syncLock.Lock()
  379. defer syncLock.Unlock()
  380. baseName := filepath.Base(zipPath)
  381. ext := filepath.Ext(baseName)
  382. baseName = strings.TrimSuffix(baseName, ext)
  383. unzipPath := filepath.Join(filepath.Dir(zipPath), baseName)
  384. err = gulu.Zip.Unzip(zipPath, unzipPath)
  385. if nil != err {
  386. return
  387. }
  388. defer os.RemoveAll(unzipPath)
  389. files, err := filepath.Glob(filepath.Join(unzipPath, "*/*.sy"))
  390. if nil != err {
  391. logging.LogErrorf("check data.zip failed: %s", err)
  392. return errors.New("check data.zip failed")
  393. }
  394. if 0 < len(files) {
  395. return errors.New(Conf.Language(198))
  396. }
  397. dirs, err := os.ReadDir(unzipPath)
  398. if nil != err {
  399. logging.LogErrorf("check data.zip failed: %s", err)
  400. return errors.New("check data.zip failed")
  401. }
  402. if 1 != len(dirs) {
  403. return errors.New(Conf.Language(198))
  404. }
  405. tmpDataPath := filepath.Join(unzipPath, dirs[0].Name())
  406. if err = filelock.Copy(tmpDataPath, util.DataDir); nil != err {
  407. logging.LogErrorf("copy data dir from [%s] to [%s] failed: %s", tmpDataPath, util.DataDir, err)
  408. err = errors.New("copy data failed")
  409. return
  410. }
  411. IncSync()
  412. FullReindex()
  413. return
  414. }
  415. func ImportFromLocalPath(boxID, localPath string, toPath string) (err error) {
  416. util.PushEndlessProgress(Conf.Language(73))
  417. defer func() {
  418. util.PushClearProgress()
  419. if e := recover(); nil != e {
  420. stack := debug.Stack()
  421. msg := fmt.Sprintf("PANIC RECOVERED: %v\n\t%s\n", e, stack)
  422. logging.LogErrorf("import from local path failed: %s", msg)
  423. err = errors.New("import from local path failed, please check kernel log for details")
  424. }
  425. }()
  426. WaitForWritingFiles()
  427. var baseHPath, baseTargetPath, boxLocalPath string
  428. if "/" == toPath {
  429. baseHPath = "/"
  430. baseTargetPath = "/"
  431. } else {
  432. block := treenode.GetBlockTreeRootByPath(boxID, toPath)
  433. if nil == block {
  434. logging.LogErrorf("not found block by path [%s]", toPath)
  435. return nil
  436. }
  437. baseHPath = block.HPath
  438. baseTargetPath = strings.TrimSuffix(block.Path, ".sy")
  439. }
  440. boxLocalPath = filepath.Join(util.DataDir, boxID)
  441. if gulu.File.IsDir(localPath) {
  442. // 收集所有资源文件
  443. assets := map[string]string{}
  444. filepath.Walk(localPath, func(currentPath string, info os.FileInfo, walkErr error) error {
  445. if localPath == currentPath {
  446. return nil
  447. }
  448. if strings.HasPrefix(info.Name(), ".") {
  449. if info.IsDir() {
  450. return filepath.SkipDir
  451. }
  452. return nil
  453. }
  454. if !strings.HasSuffix(info.Name(), ".md") && !strings.HasSuffix(info.Name(), ".markdown") {
  455. assets[currentPath] = currentPath
  456. return nil
  457. }
  458. return nil
  459. })
  460. targetPaths := map[string]string{}
  461. assetsDone := map[string]string{}
  462. // md 转换 sy
  463. filepath.Walk(localPath, func(currentPath string, info os.FileInfo, walkErr error) error {
  464. if strings.HasPrefix(info.Name(), ".") {
  465. if info.IsDir() {
  466. return filepath.SkipDir
  467. }
  468. return nil
  469. }
  470. var tree *parse.Tree
  471. var ext string
  472. title := info.Name()
  473. if !info.IsDir() {
  474. ext = path.Ext(info.Name())
  475. title = strings.TrimSuffix(info.Name(), ext)
  476. }
  477. id := ast.NewNodeID()
  478. curRelPath := filepath.ToSlash(strings.TrimPrefix(currentPath, localPath))
  479. targetPath := path.Join(baseTargetPath, id)
  480. hPath := path.Join(baseHPath, filepath.ToSlash(strings.TrimPrefix(currentPath, localPath)))
  481. hPath = strings.TrimSuffix(hPath, ext)
  482. if "" == curRelPath {
  483. curRelPath = "/"
  484. hPath = "/" + title
  485. } else {
  486. dirPath := targetPaths[path.Dir(curRelPath)]
  487. targetPath = path.Join(dirPath, id)
  488. }
  489. targetPath = strings.ReplaceAll(targetPath, ".sy/", "/")
  490. targetPath += ".sy"
  491. targetPaths[curRelPath] = targetPath
  492. if info.IsDir() {
  493. tree = treenode.NewTree(boxID, targetPath, hPath, title)
  494. importTrees = append(importTrees, tree)
  495. return nil
  496. }
  497. if !strings.HasSuffix(info.Name(), ".md") && !strings.HasSuffix(info.Name(), ".markdown") {
  498. return nil
  499. }
  500. data, readErr := os.ReadFile(currentPath)
  501. if nil != readErr {
  502. err = readErr
  503. return io.EOF
  504. }
  505. tree = parseStdMd(data)
  506. if nil == tree {
  507. logging.LogErrorf("parse tree [%s] failed", currentPath)
  508. return nil
  509. }
  510. tree.ID = id
  511. tree.Root.ID = id
  512. tree.Root.SetIALAttr("id", tree.Root.ID)
  513. tree.Root.SetIALAttr("title", title)
  514. tree.Box = boxID
  515. targetPath = path.Join(path.Dir(targetPath), tree.Root.ID+".sy")
  516. tree.Path = targetPath
  517. targetPaths[curRelPath] = targetPath
  518. tree.HPath = hPath
  519. tree.Root.Spec = "1"
  520. docDirLocalPath := filepath.Dir(filepath.Join(boxLocalPath, targetPath))
  521. assetDirPath := getAssetsDir(boxLocalPath, docDirLocalPath)
  522. currentDir := filepath.Dir(currentPath)
  523. ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
  524. if !entering || ast.NodeLinkDest != n.Type {
  525. return ast.WalkContinue
  526. }
  527. dest := n.TokensStr()
  528. if strings.HasPrefix(dest, "data:image") && strings.Contains(dest, ";base64,") {
  529. processBase64Img(n, dest, assetDirPath, err)
  530. return ast.WalkContinue
  531. }
  532. dest = strings.ReplaceAll(dest, "%20", " ")
  533. dest = strings.ReplaceAll(dest, "%5C", "/")
  534. n.Tokens = []byte(dest)
  535. if !util.IsRelativePath(dest) {
  536. return ast.WalkContinue
  537. }
  538. dest = filepath.ToSlash(dest)
  539. if "" == dest {
  540. return ast.WalkContinue
  541. }
  542. absDest := filepath.Join(currentDir, dest)
  543. fullPath, exist := assets[absDest]
  544. if !exist {
  545. absDest = filepath.Join(currentDir, string(html.DecodeDestination([]byte(dest))))
  546. }
  547. fullPath, exist = assets[absDest]
  548. if exist {
  549. existName := assetsDone[absDest]
  550. var name string
  551. if "" == existName {
  552. name = filepath.Base(fullPath)
  553. name = util.AssetName(name)
  554. assetTargetPath := filepath.Join(assetDirPath, name)
  555. if err = filelock.Copy(fullPath, assetTargetPath); nil != err {
  556. logging.LogErrorf("copy asset from [%s] to [%s] failed: %s", fullPath, assetTargetPath, err)
  557. return ast.WalkContinue
  558. }
  559. assetsDone[absDest] = name
  560. } else {
  561. name = existName
  562. }
  563. n.Tokens = []byte("assets/" + name)
  564. }
  565. return ast.WalkContinue
  566. })
  567. reassignIDUpdated(tree)
  568. importTrees = append(importTrees, tree)
  569. return nil
  570. })
  571. } else { // 导入单个文件
  572. fileName := filepath.Base(localPath)
  573. if !strings.HasSuffix(fileName, ".md") && !strings.HasSuffix(fileName, ".markdown") {
  574. return errors.New(Conf.Language(79))
  575. }
  576. title := strings.TrimSuffix(fileName, ".markdown")
  577. title = strings.TrimSuffix(title, ".md")
  578. targetPath := strings.TrimSuffix(toPath, ".sy")
  579. id := ast.NewNodeID()
  580. targetPath = path.Join(targetPath, id+".sy")
  581. var data []byte
  582. data, err = os.ReadFile(localPath)
  583. if nil != err {
  584. return err
  585. }
  586. tree := parseStdMd(data)
  587. if nil == tree {
  588. msg := fmt.Sprintf("parse tree [%s] failed", localPath)
  589. logging.LogErrorf(msg)
  590. return errors.New(msg)
  591. }
  592. tree.ID = id
  593. tree.Root.ID = id
  594. tree.Root.SetIALAttr("id", tree.Root.ID)
  595. tree.Root.SetIALAttr("title", title)
  596. tree.Box = boxID
  597. tree.Path = targetPath
  598. tree.HPath = path.Join(baseHPath, title)
  599. tree.Root.Spec = "1"
  600. docDirLocalPath := filepath.Dir(filepath.Join(boxLocalPath, targetPath))
  601. assetDirPath := getAssetsDir(boxLocalPath, docDirLocalPath)
  602. ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
  603. if !entering || ast.NodeLinkDest != n.Type {
  604. return ast.WalkContinue
  605. }
  606. dest := n.TokensStr()
  607. if strings.HasPrefix(dest, "data:image") && strings.Contains(dest, ";base64,") {
  608. processBase64Img(n, dest, assetDirPath, err)
  609. return ast.WalkContinue
  610. }
  611. dest = strings.ReplaceAll(dest, "%20", " ")
  612. dest = strings.ReplaceAll(dest, "%5C", "/")
  613. n.Tokens = []byte(dest)
  614. if !util.IsRelativePath(dest) {
  615. return ast.WalkContinue
  616. }
  617. dest = filepath.ToSlash(dest)
  618. if "" == dest {
  619. return ast.WalkContinue
  620. }
  621. absolutePath := filepath.Join(filepath.Dir(localPath), dest)
  622. exist := gulu.File.IsExist(absolutePath)
  623. if !exist {
  624. absolutePath = filepath.Join(filepath.Dir(localPath), string(html.DecodeDestination([]byte(dest))))
  625. exist = gulu.File.IsExist(absolutePath)
  626. }
  627. if exist {
  628. name := filepath.Base(absolutePath)
  629. name = util.AssetName(name)
  630. assetTargetPath := filepath.Join(assetDirPath, name)
  631. if err = filelock.Copy(absolutePath, assetTargetPath); nil != err {
  632. logging.LogErrorf("copy asset from [%s] to [%s] failed: %s", absolutePath, assetTargetPath, err)
  633. return ast.WalkContinue
  634. }
  635. n.Tokens = []byte("assets/" + name)
  636. }
  637. return ast.WalkContinue
  638. })
  639. reassignIDUpdated(tree)
  640. importTrees = append(importTrees, tree)
  641. }
  642. if 0 < len(importTrees) {
  643. initSearchLinks()
  644. convertWikiLinksAndTags()
  645. buildBlockRefInText()
  646. for i, tree := range importTrees {
  647. indexWriteJSONQueue(tree)
  648. if 0 == i%4 {
  649. util.PushEndlessProgress(fmt.Sprintf(Conf.Language(66), fmt.Sprintf("%d/%d ", i, len(importTrees))+tree.HPath))
  650. }
  651. }
  652. importTrees = []*parse.Tree{}
  653. searchLinks = map[string]string{}
  654. }
  655. IncSync()
  656. util.ReloadUI()
  657. debug.FreeOSMemory()
  658. return
  659. }
  660. func parseStdMd(markdown []byte) (ret *parse.Tree) {
  661. luteEngine := util.NewStdLute()
  662. ret = parse.Parse("", markdown, luteEngine.ParseOptions)
  663. if nil == ret {
  664. return
  665. }
  666. genTreeID(ret)
  667. imgHtmlBlock2InlineImg(ret)
  668. parse.NestedInlines2FlattedSpansHybrid(ret)
  669. return
  670. }
  671. func processBase64Img(n *ast.Node, dest string, assetDirPath string, err error) {
  672. base64TmpDir := filepath.Join(util.TempDir, "base64")
  673. os.MkdirAll(base64TmpDir, 0755)
  674. sep := strings.Index(dest, ";base64,")
  675. var decodeErr error
  676. unbased, decodeErr := base64.StdEncoding.DecodeString(dest[sep+8:])
  677. if nil != decodeErr {
  678. logging.LogErrorf("decode base64 image failed: %s", decodeErr)
  679. return
  680. }
  681. dataReader := bytes.NewReader(unbased)
  682. var img image.Image
  683. var ext string
  684. typ := dest[5:sep]
  685. switch typ {
  686. case "image/png":
  687. img, decodeErr = png.Decode(dataReader)
  688. ext = ".png"
  689. case "image/jpeg":
  690. img, decodeErr = jpeg.Decode(dataReader)
  691. ext = ".jpg"
  692. default:
  693. logging.LogWarnf("unsupported base64 image type [%s]", typ)
  694. return
  695. }
  696. if nil != decodeErr {
  697. logging.LogErrorf("decode base64 image failed: %s", decodeErr)
  698. return
  699. }
  700. name := "image" + ext
  701. alt := n.Parent.ChildByType(ast.NodeLinkText)
  702. if nil != alt {
  703. name = alt.TokensStr() + ext
  704. }
  705. name = util.FilterFileName(name)
  706. name = util.AssetName(name)
  707. tmp := filepath.Join(base64TmpDir, name)
  708. tmpFile, openErr := os.OpenFile(tmp, os.O_RDWR|os.O_CREATE, 0644)
  709. if nil != openErr {
  710. logging.LogErrorf("open temp file [%s] failed: %s", tmp, openErr)
  711. return
  712. }
  713. var encodeErr error
  714. switch typ {
  715. case "image/png":
  716. encodeErr = png.Encode(tmpFile, img)
  717. case "image/jpeg":
  718. encodeErr = jpeg.Encode(tmpFile, img, &jpeg.Options{Quality: 100})
  719. }
  720. if nil != encodeErr {
  721. logging.LogErrorf("encode base64 image failed: %s", encodeErr)
  722. tmpFile.Close()
  723. return
  724. }
  725. tmpFile.Close()
  726. assetTargetPath := filepath.Join(assetDirPath, name)
  727. if err = filelock.Copy(tmp, assetTargetPath); nil != err {
  728. logging.LogErrorf("copy asset from [%s] to [%s] failed: %s", tmp, assetTargetPath, err)
  729. return
  730. }
  731. n.Tokens = []byte("assets/" + name)
  732. }
  733. func imgHtmlBlock2InlineImg(tree *parse.Tree) {
  734. imgHtmlBlocks := map[*ast.Node]*html.Node{}
  735. ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
  736. if !entering {
  737. return ast.WalkContinue
  738. }
  739. if ast.NodeHTMLBlock == n.Type {
  740. htmlNodes, pErr := html.ParseFragment(bytes.NewReader(n.Tokens), &html.Node{Type: html.ElementNode})
  741. if nil != pErr {
  742. logging.LogErrorf("parse html block [%s] failed: %s", n.Tokens, pErr)
  743. return ast.WalkContinue
  744. }
  745. if 1 > len(htmlNodes) {
  746. return ast.WalkContinue
  747. }
  748. for _, htmlNode := range htmlNodes {
  749. if atom.Img == htmlNode.DataAtom {
  750. imgHtmlBlocks[n] = htmlNode
  751. break
  752. }
  753. }
  754. }
  755. return ast.WalkContinue
  756. })
  757. for n, htmlImg := range imgHtmlBlocks {
  758. src := domAttrValue(htmlImg, "src")
  759. alt := domAttrValue(htmlImg, "alt")
  760. title := domAttrValue(htmlImg, "title")
  761. p := &ast.Node{Type: ast.NodeParagraph, ID: n.ID}
  762. img := &ast.Node{Type: ast.NodeImage}
  763. p.AppendChild(img)
  764. img.AppendChild(&ast.Node{Type: ast.NodeBang})
  765. img.AppendChild(&ast.Node{Type: ast.NodeOpenBracket})
  766. img.AppendChild(&ast.Node{Type: ast.NodeLinkText, Tokens: []byte(alt)})
  767. img.AppendChild(&ast.Node{Type: ast.NodeCloseBracket})
  768. img.AppendChild(&ast.Node{Type: ast.NodeOpenParen})
  769. img.AppendChild(&ast.Node{Type: ast.NodeLinkDest, Tokens: []byte(src)})
  770. if "" != title {
  771. img.AppendChild(&ast.Node{Type: ast.NodeLinkSpace})
  772. img.AppendChild(&ast.Node{Type: ast.NodeLinkTitle})
  773. }
  774. img.AppendChild(&ast.Node{Type: ast.NodeCloseParen})
  775. n.InsertBefore(p)
  776. n.Unlink()
  777. }
  778. return
  779. }
  780. func reassignIDUpdated(tree *parse.Tree) {
  781. var blockCount int
  782. ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
  783. if !entering || "" == n.ID {
  784. return ast.WalkContinue
  785. }
  786. blockCount++
  787. return ast.WalkContinue
  788. })
  789. ids := make([]string, blockCount)
  790. min, _ := strconv.ParseInt(time.Now().Add(-1*time.Duration(blockCount)*time.Second).Format("20060102150405"), 10, 64)
  791. for i := 0; i < blockCount; i++ {
  792. ids[i] = newID(fmt.Sprintf("%d", min))
  793. min++
  794. }
  795. var i int
  796. ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
  797. if !entering || "" == n.ID {
  798. return ast.WalkContinue
  799. }
  800. n.ID = ids[i]
  801. n.SetIALAttr("id", n.ID)
  802. n.SetIALAttr("updated", util.TimeFromID(n.ID))
  803. i++
  804. return ast.WalkContinue
  805. })
  806. tree.ID = tree.Root.ID
  807. tree.Path = path.Join(path.Dir(tree.Path), tree.ID+".sy")
  808. tree.Root.SetIALAttr("id", tree.Root.ID)
  809. }
  810. func newID(t string) string {
  811. return t + "-" + randStr(7)
  812. }
  813. func randStr(length int) string {
  814. letter := []rune("abcdefghijklmnopqrstuvwxyz0123456789")
  815. b := make([]rune, length)
  816. for i := range b {
  817. b[i] = letter[rand.Intn(len(letter))]
  818. }
  819. return string(b)
  820. }
  821. func domAttrValue(n *html.Node, attrName string) string {
  822. if nil == n {
  823. return ""
  824. }
  825. for _, attr := range n.Attr {
  826. if attr.Key == attrName {
  827. return attr.Val
  828. }
  829. }
  830. return ""
  831. }
  832. var importTrees []*parse.Tree
  833. var searchLinks = map[string]string{}
  834. func initSearchLinks() {
  835. for _, tree := range importTrees {
  836. ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
  837. if !entering || (ast.NodeDocument != n.Type && ast.NodeHeading != n.Type) {
  838. return ast.WalkContinue
  839. }
  840. nodePath := tree.HPath + "#"
  841. if ast.NodeHeading == n.Type {
  842. nodePath += n.Text()
  843. }
  844. searchLinks[nodePath] = n.ID
  845. return ast.WalkContinue
  846. })
  847. }
  848. }
  849. func convertWikiLinksAndTags() {
  850. for _, tree := range importTrees {
  851. convertWikiLinksAndTags0(tree)
  852. }
  853. }
  854. func convertWikiLinksAndTags0(tree *parse.Tree) {
  855. ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
  856. if !entering || ast.NodeText != n.Type {
  857. return ast.WalkContinue
  858. }
  859. text := n.TokensStr()
  860. length := len(text)
  861. start, end := 0, length
  862. for {
  863. part := text[start:end]
  864. if idx := strings.Index(part, "]]"); 0 > idx {
  865. break
  866. } else {
  867. end = start + idx
  868. }
  869. if idx := strings.Index(part, "[["); 0 > idx {
  870. break
  871. } else {
  872. start += idx
  873. }
  874. if end <= start {
  875. break
  876. }
  877. link := path.Join(path.Dir(tree.HPath), text[start+2:end]) // 统一转为绝对路径方便后续查找
  878. linkText := path.Base(link)
  879. dynamicAnchorText := true
  880. if linkParts := strings.Split(link, "|"); 1 < len(linkParts) {
  881. link = linkParts[0]
  882. linkText = linkParts[1]
  883. dynamicAnchorText = false
  884. }
  885. link, linkText = strings.TrimSpace(link), strings.TrimSpace(linkText)
  886. if !strings.Contains(link, "#") {
  887. link += "#" // 在结尾统一带上锚点方便后续查找
  888. }
  889. id := searchLinkID(link)
  890. if "" == id {
  891. start, end = end, length
  892. continue
  893. }
  894. linkText = strings.TrimPrefix(linkText, "/")
  895. repl := "((" + id + " '" + linkText + "'))"
  896. if !dynamicAnchorText {
  897. repl = "((" + id + " \"" + linkText + "\"))"
  898. }
  899. end += 2
  900. text = text[:start] + repl + text[end:]
  901. start, end = start+len(repl), len(text)
  902. length = end
  903. }
  904. text = convertTags(text) // 导入标签语法
  905. n.Tokens = gulu.Str.ToBytes(text)
  906. return ast.WalkContinue
  907. })
  908. }
  909. func convertTags(text string) (ret string) {
  910. pos, i := -1, 0
  911. tokens := []byte(text)
  912. for ; i < len(tokens); i++ {
  913. if '#' == tokens[i] && (0 == i || ' ' == tokens[i-1] || (-1 < pos && '#' == tokens[pos])) {
  914. if i < len(tokens)-1 && '#' == tokens[i+1] {
  915. pos = -1
  916. continue
  917. }
  918. pos = i
  919. continue
  920. }
  921. if -1 < pos && ' ' == tokens[i] {
  922. tokens = append(tokens, 0)
  923. copy(tokens[i+1:], tokens[i:])
  924. tokens[i] = '#'
  925. pos = -1
  926. i++
  927. }
  928. }
  929. if -1 < pos && pos < i {
  930. tokens = append(tokens, '#')
  931. }
  932. return string(tokens)
  933. }
  934. // buildBlockRefInText 将文本节点进行结构化处理。
  935. func buildBlockRefInText() {
  936. lute := NewLute()
  937. lute.SetHTMLTag2TextMark(true)
  938. for _, tree := range importTrees {
  939. tree.MergeText()
  940. var unlinkTextNodes []*ast.Node
  941. ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
  942. if !entering || ast.NodeText != n.Type {
  943. return ast.WalkContinue
  944. }
  945. if nil == n.Tokens {
  946. return ast.WalkContinue
  947. }
  948. t := parse.Inline("", n.Tokens, lute.ParseOptions) // 使用行级解析
  949. parse.NestedInlines2FlattedSpans(t)
  950. var children []*ast.Node
  951. for c := t.Root.FirstChild.FirstChild; nil != c; c = c.Next {
  952. children = append(children, c)
  953. }
  954. for _, c := range children {
  955. n.InsertBefore(c)
  956. }
  957. unlinkTextNodes = append(unlinkTextNodes, n)
  958. return ast.WalkContinue
  959. })
  960. for _, node := range unlinkTextNodes {
  961. node.Unlink()
  962. }
  963. }
  964. }
  965. func searchLinkID(link string) (id string) {
  966. id = searchLinks[link]
  967. if "" != id {
  968. return
  969. }
  970. baseName := path.Base(link)
  971. for searchLink, searchID := range searchLinks {
  972. if path.Base(searchLink) == baseName {
  973. return searchID
  974. }
  975. }
  976. return
  977. }