1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348 |
- // SiYuan - Refactor your thinking
- // Copyright (c) 2020-present, b3log.org
- //
- // This program is free software: you can redistribute it and/or modify
- // it under the terms of the GNU Affero General Public License as published by
- // the Free Software Foundation, either version 3 of the License, or
- // (at your option) any later version.
- //
- // This program is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU Affero General Public License for more details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with this program. If not, see <https://www.gnu.org/licenses/>.
- package model
- import (
- "bytes"
- "encoding/base64"
- "encoding/json"
- "errors"
- "fmt"
- "image"
- "image/jpeg"
- "image/png"
- "io"
- "io/fs"
- "math/rand"
- "os"
- "path"
- "path/filepath"
- "regexp"
- "runtime/debug"
- "sort"
- "strconv"
- "strings"
- "time"
- "github.com/88250/gulu"
- "github.com/88250/lute/ast"
- "github.com/88250/lute/html"
- "github.com/88250/lute/html/atom"
- "github.com/88250/lute/parse"
- "github.com/88250/lute/render"
- "github.com/siyuan-note/filelock"
- "github.com/siyuan-note/logging"
- "github.com/siyuan-note/riff"
- "github.com/siyuan-note/siyuan/kernel/av"
- "github.com/siyuan-note/siyuan/kernel/cache"
- "github.com/siyuan-note/siyuan/kernel/filesys"
- "github.com/siyuan-note/siyuan/kernel/sql"
- "github.com/siyuan-note/siyuan/kernel/treenode"
- "github.com/siyuan-note/siyuan/kernel/util"
- )
- func HTML2Markdown(htmlStr string) (markdown string, err error) {
- assetDirPath := filepath.Join(util.DataDir, "assets")
- luteEngine := util.NewLute()
- tree := luteEngine.HTML2Tree(htmlStr)
- ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
- if !entering || ast.NodeLinkDest != n.Type {
- return ast.WalkContinue
- }
- dest := n.TokensStr()
- if strings.HasPrefix(dest, "data:image") && strings.Contains(dest, ";base64,") {
- processBase64Img(n, dest, assetDirPath, err)
- return ast.WalkContinue
- }
- return ast.WalkContinue
- })
- var formatted []byte
- renderer := render.NewFormatRenderer(tree, luteEngine.RenderOptions)
- for nodeType, rendererFunc := range luteEngine.HTML2MdRendererFuncs {
- renderer.ExtRendererFuncs[nodeType] = rendererFunc
- }
- formatted = renderer.Render()
- markdown = gulu.Str.FromBytes(formatted)
- return
- }
- func ImportSY(zipPath, boxID, toPath string) (err error) {
- util.PushEndlessProgress(Conf.Language(73))
- defer util.ClearPushProgress(100)
- lockSync()
- defer unlockSync()
- baseName := filepath.Base(zipPath)
- ext := filepath.Ext(baseName)
- baseName = strings.TrimSuffix(baseName, ext)
- unzipPath := filepath.Join(filepath.Dir(zipPath), baseName+"-"+gulu.Rand.String(7))
- err = gulu.Zip.Unzip(zipPath, unzipPath)
- if nil != err {
- return
- }
- defer os.RemoveAll(unzipPath)
- var syPaths []string
- filelock.Walk(unzipPath, func(path string, info fs.FileInfo, err error) error {
- if nil != err {
- return err
- }
- if !info.IsDir() && strings.HasSuffix(info.Name(), ".sy") {
- syPaths = append(syPaths, path)
- }
- return nil
- })
- entries, err := os.ReadDir(unzipPath)
- if nil != err {
- logging.LogErrorf("read unzip dir [%s] failed: %s", unzipPath, err)
- return
- }
- if 1 != len(entries) {
- logging.LogErrorf("invalid .sy.zip [%v]", entries)
- return errors.New(Conf.Language(199))
- }
- unzipRootPath := filepath.Join(unzipPath, entries[0].Name())
- name := filepath.Base(unzipRootPath)
- if strings.HasPrefix(name, "data-20") && len("data-20230321175442") == len(name) {
- logging.LogErrorf("invalid .sy.zip [unzipRootPath=%s, baseName=%s]", unzipRootPath, name)
- return errors.New(Conf.Language(199))
- }
- luteEngine := util.NewLute()
- blockIDs := map[string]string{}
- avBlockIDs := map[string]string{}
- trees := map[string]*parse.Tree{}
- // 重新生成块 ID
- for _, syPath := range syPaths {
- data, readErr := os.ReadFile(syPath)
- if nil != readErr {
- logging.LogErrorf("read .sy [%s] failed: %s", syPath, readErr)
- err = readErr
- return
- }
- tree, _, parseErr := filesys.ParseJSON(data, luteEngine.ParseOptions)
- if nil != parseErr {
- logging.LogErrorf("parse .sy [%s] failed: %s", syPath, parseErr)
- err = parseErr
- return
- }
- ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
- if !entering || "" == n.ID {
- return ast.WalkContinue
- }
- // 新 ID 保留时间部分,仅修改随机值,避免时间变化导致更新时间早于创建时间
- // Keep original creation time when importing .sy.zip https://github.com/siyuan-note/siyuan/issues/9923
- newNodeID := util.TimeFromID(n.ID) + "-" + util.RandString(7)
- blockIDs[n.ID] = newNodeID
- oldNodeID := n.ID
- n.ID = newNodeID
- n.SetIALAttr("id", newNodeID)
- // 重新指向数据库属性值
- for _, kv := range n.KramdownIAL {
- if 2 > len(kv) {
- continue
- }
- if strings.HasPrefix(kv[0], av.NodeAttrNameAvs) {
- avBlockIDs[oldNodeID] = newNodeID
- }
- }
- return ast.WalkContinue
- })
- tree.ID = tree.Root.ID
- tree.Path = filepath.ToSlash(strings.TrimPrefix(syPath, unzipRootPath))
- trees[tree.ID] = tree
- }
- // 引用和嵌入指向重新生成的块 ID
- for _, tree := range trees {
- ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
- if !entering {
- return ast.WalkContinue
- }
- if treenode.IsBlockRef(n) {
- defID, _, _ := treenode.GetBlockRef(n)
- newDefID := blockIDs[defID]
- if "" != newDefID {
- n.TextMarkBlockRefID = newDefID
- }
- } else if ast.NodeTextMark == n.Type && n.IsTextMarkType("a") && strings.HasPrefix(n.TextMarkAHref, "siyuan://blocks/") {
- // Block hyperlinks do not point to regenerated block IDs when importing .sy.zip https://github.com/siyuan-note/siyuan/issues/9083
- defID := strings.TrimPrefix(n.TextMarkAHref, "siyuan://blocks/")
- newDefID := blockIDs[defID]
- if "" != newDefID {
- n.TextMarkAHref = "siyuan://blocks/" + newDefID
- }
- } else if ast.NodeBlockQueryEmbedScript == n.Type {
- for oldID, newID := range blockIDs {
- // 导入 `.sy.zip` 后查询嵌入块失效 https://github.com/siyuan-note/siyuan/issues/5316
- n.Tokens = bytes.ReplaceAll(n.Tokens, []byte(oldID), []byte(newID))
- }
- }
- return ast.WalkContinue
- })
- }
- // 将关联的数据库文件移动到 data/storage/av/ 下
- storage := filepath.Join(unzipRootPath, "storage")
- storageAvDir := filepath.Join(storage, "av")
- avIDs := map[string]string{}
- renameAvPaths := map[string]string{}
- if gulu.File.IsExist(storageAvDir) {
- // 重新生成数据库数据
- filelock.Walk(storageAvDir, func(path string, info fs.FileInfo, err error) error {
- if !strings.HasSuffix(path, ".json") || !ast.IsNodeIDPattern(strings.TrimSuffix(info.Name(), ".json")) {
- return nil
- }
- // 重命名数据库
- newAvID := ast.NewNodeID()
- oldAvID := strings.TrimSuffix(info.Name(), ".json")
- newPath := filepath.Join(filepath.Dir(path), newAvID+".json")
- renameAvPaths[path] = newPath
- avIDs[oldAvID] = newAvID
- return nil
- })
- // 重命名数据库文件
- for oldPath, newPath := range renameAvPaths {
- data, readErr := os.ReadFile(oldPath)
- if nil != readErr {
- logging.LogErrorf("read av file [%s] failed: %s", oldPath, readErr)
- return nil
- }
- // 将数据库文件中的块 ID 替换为新的块 ID
- newData := data
- for oldAvID, newAvID := range avIDs {
- for oldID, newID := range avBlockIDs {
- newData = bytes.ReplaceAll(newData, []byte(oldID), []byte(newID))
- }
- newData = bytes.ReplaceAll(newData, []byte(oldAvID), []byte(newAvID))
- }
- if !bytes.Equal(data, newData) {
- if writeErr := os.WriteFile(oldPath, newData, 0644); nil != writeErr {
- logging.LogErrorf("write av file [%s] failed: %s", oldPath, writeErr)
- return nil
- }
- }
- if err = os.Rename(oldPath, newPath); nil != err {
- logging.LogErrorf("rename av file from [%s] to [%s] failed: %s", oldPath, newPath, err)
- return
- }
- }
- targetStorageAvDir := filepath.Join(util.DataDir, "storage", "av")
- if copyErr := filelock.Copy(storageAvDir, targetStorageAvDir); nil != copyErr {
- logging.LogErrorf("copy storage av dir from [%s] to [%s] failed: %s", storageAvDir, targetStorageAvDir, copyErr)
- }
- // 重新指向数据库属性值
- for _, tree := range trees {
- ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
- if !entering || "" == n.ID {
- return ast.WalkContinue
- }
- ial := parse.IAL2Map(n.KramdownIAL)
- for k, v := range ial {
- if strings.HasPrefix(k, av.NodeAttrNameAvs) {
- newKey, newVal := k, v
- for oldAvID, newAvID := range avIDs {
- newKey = strings.ReplaceAll(newKey, oldAvID, newAvID)
- newVal = strings.ReplaceAll(newVal, oldAvID, newAvID)
- }
- n.RemoveIALAttr(k)
- n.SetIALAttr(newKey, newVal)
- }
- }
- if ast.NodeAttributeView == n.Type {
- n.AttributeViewID = avIDs[n.AttributeViewID]
- }
- return ast.WalkContinue
- })
- // 关联数据库和块
- avNodes := tree.Root.ChildrenByType(ast.NodeAttributeView)
- av.BatchUpsertBlockRel(avNodes)
- }
- // 如果数据库中绑定的块不在导入的文档中
- cachedTrees, saveTrees := map[string]*parse.Tree{}, map[string]*parse.Tree{}
- for _, avID := range avIDs {
- attrView, _ := av.ParseAttributeView(avID)
- if nil == attrView {
- continue
- }
- blockKeyValues := attrView.GetBlockKeyValues()
- for _, blockValue := range blockKeyValues.Values {
- if blockValue.IsDetached {
- continue
- }
- bt := treenode.GetBlockTree(blockValue.BlockID)
- if nil == bt {
- continue
- }
- tree := cachedTrees[bt.RootID]
- if nil == tree {
- tree, _ = filesys.LoadTree(bt.BoxID, bt.Path, luteEngine)
- if nil == tree {
- continue
- }
- cachedTrees[bt.RootID] = tree
- }
- node := treenode.GetNodeInTree(tree, blockValue.BlockID)
- if nil == node {
- continue
- }
- attrs := parse.IAL2Map(node.KramdownIAL)
- if "" == attrs[av.NodeAttrNameAvs] {
- attrs[av.NodeAttrNameAvs] = avID
- } else {
- nodeAvIDs := strings.Split(attrs[av.NodeAttrNameAvs], ",")
- nodeAvIDs = append(nodeAvIDs, avID)
- nodeAvIDs = gulu.Str.RemoveDuplicatedElem(nodeAvIDs)
- attrs[av.NodeAttrNameAvs] = strings.Join(nodeAvIDs, ",")
- saveTrees[bt.RootID] = tree
- }
- avNames := getAvNames(attrs[av.NodeAttrNameAvs])
- if "" != avNames {
- attrs[av.NodeAttrViewNames] = avNames
- }
- oldAttrs, setErr := setNodeAttrs0(node, attrs)
- if nil != setErr {
- continue
- }
- cache.PutBlockIAL(node.ID, parse.IAL2Map(node.KramdownIAL))
- pushBroadcastAttrTransactions(oldAttrs, node)
- }
- }
- for _, saveTree := range saveTrees {
- if treeErr := indexWriteTreeUpsertQueue(saveTree); nil != treeErr {
- logging.LogErrorf("index write tree upsert queue failed: %s", treeErr)
- }
- avNodes := saveTree.Root.ChildrenByType(ast.NodeAttributeView)
- av.BatchUpsertBlockRel(avNodes)
- }
- }
- // 将关联的闪卡数据合并到默认卡包 data/storage/riff/20230218211946-2kw8jgx 中
- storageRiffDir := filepath.Join(storage, "riff")
- if gulu.File.IsExist(storageRiffDir) {
- deckToImport, loadErr := riff.LoadDeck(storageRiffDir, builtinDeckID, Conf.Flashcard.RequestRetention, Conf.Flashcard.MaximumInterval, Conf.Flashcard.Weights)
- if nil != loadErr {
- logging.LogErrorf("load deck [%s] failed: %s", name, loadErr)
- } else {
- deck := Decks[builtinDeckID]
- if nil == deck {
- var createErr error
- deck, createErr = createDeck0("Built-in Deck", builtinDeckID)
- if nil == createErr {
- Decks[deck.ID] = deck
- }
- }
- bIDs := deckToImport.GetBlockIDs()
- cards := deckToImport.GetCardsByBlockIDs(bIDs)
- for _, card := range cards {
- deck.AddCard(card.ID(), blockIDs[card.BlockID()])
- }
- if 0 < len(cards) {
- if saveErr := deck.Save(); nil != saveErr {
- logging.LogErrorf("save deck [%s] failed: %s", name, saveErr)
- }
- }
- }
- }
- // storage 文件夹已在上方处理,所以这里删除源 storage 文件夹,避免后面被拷贝到导入目录下 targetDir
- if removeErr := os.RemoveAll(storage); nil != removeErr {
- logging.LogErrorf("remove temp storage av dir failed: %s", removeErr)
- }
- // 写回 .sy
- for _, tree := range trees {
- syPath := filepath.Join(unzipRootPath, tree.Path)
- if "" == tree.Root.Spec {
- parse.NestedInlines2FlattedSpans(tree, false)
- tree.Root.Spec = "1"
- }
- renderer := render.NewJSONRenderer(tree, luteEngine.RenderOptions)
- data := renderer.Render()
- if !util.UseSingleLineSave {
- buf := bytes.Buffer{}
- buf.Grow(1024 * 1024 * 2)
- if err = json.Indent(&buf, data, "", "\t"); nil != err {
- return
- }
- data = buf.Bytes()
- }
- if err = os.WriteFile(syPath, data, 0644); nil != err {
- logging.LogErrorf("write .sy [%s] failed: %s", syPath, err)
- return
- }
- newSyPath := filepath.Join(filepath.Dir(syPath), tree.ID+".sy")
- if err = filelock.Rename(syPath, newSyPath); nil != err {
- logging.LogErrorf("rename .sy from [%s] to [%s] failed: %s", syPath, newSyPath, err)
- return
- }
- }
- // 合并 sort.json
- fullSortIDs := map[string]int{}
- sortIDs := map[string]int{}
- var sortData []byte
- var sortErr error
- sortPath := filepath.Join(unzipRootPath, ".siyuan", "sort.json")
- if filelock.IsExist(sortPath) {
- sortData, sortErr = filelock.ReadFile(sortPath)
- if nil != sortErr {
- logging.LogErrorf("read import sort conf failed: %s", sortErr)
- }
- if sortErr = gulu.JSON.UnmarshalJSON(sortData, &sortIDs); nil != sortErr {
- logging.LogErrorf("unmarshal sort conf failed: %s", sortErr)
- }
- boxSortPath := filepath.Join(util.DataDir, boxID, ".siyuan", "sort.json")
- if filelock.IsExist(boxSortPath) {
- sortData, sortErr = filelock.ReadFile(boxSortPath)
- if nil != sortErr {
- logging.LogErrorf("read box sort conf failed: %s", sortErr)
- }
- if sortErr = gulu.JSON.UnmarshalJSON(sortData, &fullSortIDs); nil != sortErr {
- logging.LogErrorf("unmarshal box sort conf failed: %s", sortErr)
- }
- }
- for oldID, sort := range sortIDs {
- if newID := blockIDs[oldID]; "" != newID {
- fullSortIDs[newID] = sort
- }
- }
- sortData, sortErr = gulu.JSON.MarshalJSON(fullSortIDs)
- if nil != sortErr {
- logging.LogErrorf("marshal box full sort conf failed: %s", sortErr)
- } else {
- sortErr = filelock.WriteFile(boxSortPath, sortData)
- if nil != sortErr {
- logging.LogErrorf("write box full sort conf failed: %s", sortErr)
- }
- }
- if removeErr := os.RemoveAll(sortPath); nil != removeErr {
- logging.LogErrorf("remove temp sort conf failed: %s", removeErr)
- }
- }
- // 重命名文件路径
- renamePaths := map[string]string{}
- filelock.Walk(unzipRootPath, func(path string, info fs.FileInfo, err error) error {
- if nil != err {
- return err
- }
- if info.IsDir() && ast.IsNodeIDPattern(info.Name()) {
- renamePaths[path] = path
- }
- return nil
- })
- for p, _ := range renamePaths {
- originalPath := p
- p = strings.TrimPrefix(p, unzipRootPath)
- p = filepath.ToSlash(p)
- parts := strings.Split(p, "/")
- buf := bytes.Buffer{}
- buf.WriteString("/")
- for i, part := range parts {
- if "" == part {
- continue
- }
- newNodeID := blockIDs[part]
- if "" != newNodeID {
- buf.WriteString(newNodeID)
- } else {
- buf.WriteString(part)
- }
- if i < len(parts)-1 {
- buf.WriteString("/")
- }
- }
- newPath := buf.String()
- renamePaths[originalPath] = filepath.Join(unzipRootPath, newPath)
- }
- var oldPaths []string
- for oldPath, _ := range renamePaths {
- oldPaths = append(oldPaths, oldPath)
- }
- sort.Slice(oldPaths, func(i, j int) bool {
- return strings.Count(oldPaths[i], string(os.PathSeparator)) < strings.Count(oldPaths[j], string(os.PathSeparator))
- })
- for i, oldPath := range oldPaths {
- newPath := renamePaths[oldPath]
- if err = filelock.Rename(oldPath, newPath); nil != err {
- logging.LogErrorf("rename path from [%s] to [%s] failed: %s", oldPath, renamePaths[oldPath], err)
- return errors.New("rename path failed")
- }
- delete(renamePaths, oldPath)
- var toRemoves []string
- newRenamedPaths := map[string]string{}
- for oldP, newP := range renamePaths {
- if strings.HasPrefix(oldP, oldPath) {
- renamedOldP := strings.Replace(oldP, oldPath, newPath, 1)
- newRenamedPaths[renamedOldP] = newP
- toRemoves = append(toRemoves, oldPath)
- }
- }
- for _, toRemove := range toRemoves {
- delete(renamePaths, toRemove)
- }
- for oldP, newP := range newRenamedPaths {
- renamePaths[oldP] = newP
- }
- for j := i + 1; j < len(oldPaths); j++ {
- if strings.HasPrefix(oldPaths[j], oldPath) {
- renamedOldP := strings.Replace(oldPaths[j], oldPath, newPath, 1)
- oldPaths[j] = renamedOldP
- }
- }
- }
- // 将包含的资源文件统一移动到 data/assets/ 下
- var assetsDirs []string
- filelock.Walk(unzipRootPath, func(path string, info fs.FileInfo, err error) error {
- if strings.Contains(path, "assets") && info.IsDir() {
- assetsDirs = append(assetsDirs, path)
- }
- return nil
- })
- dataAssets := filepath.Join(util.DataDir, "assets")
- for _, assets := range assetsDirs {
- if gulu.File.IsDir(assets) {
- if err = filelock.Copy(assets, dataAssets); nil != err {
- logging.LogErrorf("copy assets from [%s] to [%s] failed: %s", assets, dataAssets, err)
- return
- }
- }
- os.RemoveAll(assets)
- }
- var baseTargetPath string
- if "/" == toPath {
- baseTargetPath = "/"
- } else {
- block := treenode.GetBlockTreeRootByPath(boxID, toPath)
- if nil == block {
- logging.LogErrorf("not found block by path [%s]", toPath)
- return nil
- }
- baseTargetPath = strings.TrimSuffix(block.Path, ".sy")
- }
- targetDir := filepath.Join(util.DataDir, boxID, baseTargetPath)
- if err = os.MkdirAll(targetDir, 0755); nil != err {
- return
- }
- var treePaths []string
- filelock.Walk(unzipRootPath, func(path string, info fs.FileInfo, err error) error {
- if info.IsDir() {
- if strings.HasPrefix(info.Name(), ".") {
- return filepath.SkipDir
- }
- return nil
- }
- if !strings.HasSuffix(info.Name(), ".sy") {
- return nil
- }
- p := strings.TrimPrefix(path, unzipRootPath)
- p = filepath.ToSlash(p)
- treePaths = append(treePaths, p)
- return nil
- })
- if err = filelock.Copy(unzipRootPath, targetDir); nil != err {
- logging.LogErrorf("copy data dir from [%s] to [%s] failed: %s", unzipRootPath, util.DataDir, err)
- err = errors.New("copy data failed")
- return
- }
- boxAbsPath := filepath.Join(util.DataDir, boxID)
- for _, treePath := range treePaths {
- absPath := filepath.Join(targetDir, treePath)
- p := strings.TrimPrefix(absPath, boxAbsPath)
- p = filepath.ToSlash(p)
- tree, err := filesys.LoadTree(boxID, p, luteEngine)
- if nil != err {
- logging.LogErrorf("load tree [%s] failed: %s", treePath, err)
- continue
- }
- treenode.IndexBlockTree(tree)
- sql.IndexTreeQueue(tree)
- }
- IncSync()
- return
- }
- func ImportData(zipPath string) (err error) {
- util.PushEndlessProgress(Conf.Language(73))
- defer util.ClearPushProgress(100)
- lockSync()
- defer unlockSync()
- baseName := filepath.Base(zipPath)
- ext := filepath.Ext(baseName)
- baseName = strings.TrimSuffix(baseName, ext)
- unzipPath := filepath.Join(filepath.Dir(zipPath), baseName)
- err = gulu.Zip.Unzip(zipPath, unzipPath)
- if nil != err {
- return
- }
- defer os.RemoveAll(unzipPath)
- files, err := filepath.Glob(filepath.Join(unzipPath, "*/*.sy"))
- if nil != err {
- logging.LogErrorf("check data.zip failed: %s", err)
- return errors.New("check data.zip failed")
- }
- if 0 < len(files) {
- return errors.New(Conf.Language(198))
- }
- dirs, err := os.ReadDir(unzipPath)
- if nil != err {
- logging.LogErrorf("check data.zip failed: %s", err)
- return errors.New("check data.zip failed")
- }
- if 1 != len(dirs) {
- return errors.New(Conf.Language(198))
- }
- tmpDataPath := filepath.Join(unzipPath, dirs[0].Name())
- if err = filelock.Copy(tmpDataPath, util.DataDir); nil != err {
- logging.LogErrorf("copy data dir from [%s] to [%s] failed: %s", tmpDataPath, util.DataDir, err)
- err = errors.New("copy data failed")
- return
- }
- IncSync()
- FullReindex()
- return
- }
- func ImportFromLocalPath(boxID, localPath string, toPath string) (err error) {
- util.PushEndlessProgress(Conf.Language(73))
- defer func() {
- util.PushClearProgress()
- if e := recover(); nil != e {
- stack := debug.Stack()
- msg := fmt.Sprintf("PANIC RECOVERED: %v\n\t%s\n", e, stack)
- logging.LogErrorf("import from local path failed: %s", msg)
- err = errors.New("import from local path failed, please check kernel log for details")
- }
- }()
- lockSync()
- defer unlockSync()
- WaitForWritingFiles()
- var baseHPath, baseTargetPath, boxLocalPath string
- if "/" == toPath {
- baseHPath = "/"
- baseTargetPath = "/"
- } else {
- block := treenode.GetBlockTreeRootByPath(boxID, toPath)
- if nil == block {
- logging.LogErrorf("not found block by path [%s]", toPath)
- return nil
- }
- baseHPath = block.HPath
- baseTargetPath = strings.TrimSuffix(block.Path, ".sy")
- }
- boxLocalPath = filepath.Join(util.DataDir, boxID)
- if gulu.File.IsDir(localPath) {
- // 收集所有资源文件
- assets := map[string]string{}
- filelock.Walk(localPath, func(currentPath string, info os.FileInfo, walkErr error) error {
- if localPath == currentPath {
- return nil
- }
- if strings.HasPrefix(info.Name(), ".") {
- if info.IsDir() {
- return filepath.SkipDir
- }
- return nil
- }
- if !strings.HasSuffix(info.Name(), ".md") && !strings.HasSuffix(info.Name(), ".markdown") {
- assets[currentPath] = currentPath
- return nil
- }
- return nil
- })
- targetPaths := map[string]string{}
- assetsDone := map[string]string{}
- // md 转换 sy
- filelock.Walk(localPath, func(currentPath string, info os.FileInfo, walkErr error) error {
- if strings.HasPrefix(info.Name(), ".") {
- if info.IsDir() {
- return filepath.SkipDir
- }
- return nil
- }
- var tree *parse.Tree
- var ext string
- title := info.Name()
- if !info.IsDir() {
- ext = path.Ext(info.Name())
- title = strings.TrimSuffix(info.Name(), ext)
- }
- id := ast.NewNodeID()
- curRelPath := filepath.ToSlash(strings.TrimPrefix(currentPath, localPath))
- targetPath := path.Join(baseTargetPath, id)
- hPath := path.Join(baseHPath, filepath.ToSlash(strings.TrimPrefix(currentPath, localPath)))
- hPath = strings.TrimSuffix(hPath, ext)
- if "" == curRelPath {
- curRelPath = "/"
- hPath = "/" + title
- } else {
- dirPath := targetPaths[path.Dir(curRelPath)]
- targetPath = path.Join(dirPath, id)
- }
- targetPath = strings.ReplaceAll(targetPath, ".sy/", "/")
- targetPath += ".sy"
- targetPaths[curRelPath] = targetPath
- if info.IsDir() {
- tree = treenode.NewTree(boxID, targetPath, hPath, title)
- importTrees = append(importTrees, tree)
- return nil
- }
- if !strings.HasSuffix(info.Name(), ".md") && !strings.HasSuffix(info.Name(), ".markdown") {
- return nil
- }
- data, readErr := os.ReadFile(currentPath)
- if nil != readErr {
- err = readErr
- return io.EOF
- }
- tree = parseStdMd(data)
- if nil == tree {
- logging.LogErrorf("parse tree [%s] failed", currentPath)
- return nil
- }
- tree.ID = id
- tree.Root.ID = id
- tree.Root.SetIALAttr("id", tree.Root.ID)
- tree.Root.SetIALAttr("title", title)
- tree.Box = boxID
- targetPath = path.Join(path.Dir(targetPath), tree.Root.ID+".sy")
- tree.Path = targetPath
- targetPaths[curRelPath] = targetPath
- tree.HPath = hPath
- tree.Root.Spec = "1"
- docDirLocalPath := filepath.Dir(filepath.Join(boxLocalPath, targetPath))
- assetDirPath := getAssetsDir(boxLocalPath, docDirLocalPath)
- currentDir := filepath.Dir(currentPath)
- ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
- if !entering || (ast.NodeLinkDest != n.Type && !n.IsTextMarkType("a")) {
- return ast.WalkContinue
- }
- var dest string
- if ast.NodeLinkDest == n.Type {
- dest = n.TokensStr()
- } else {
- dest = n.TextMarkAHref
- }
- if strings.HasPrefix(dest, "data:image") && strings.Contains(dest, ";base64,") {
- processBase64Img(n, dest, assetDirPath, err)
- return ast.WalkContinue
- }
- dest = strings.ReplaceAll(dest, "%20", " ")
- dest = strings.ReplaceAll(dest, "%5C", "/")
- if ast.NodeLinkDest == n.Type {
- n.Tokens = []byte(dest)
- } else {
- n.TextMarkAHref = dest
- }
- if !util.IsRelativePath(dest) {
- return ast.WalkContinue
- }
- dest = filepath.ToSlash(dest)
- if "" == dest {
- return ast.WalkContinue
- }
- absDest := filepath.Join(currentDir, dest)
- fullPath, exist := assets[absDest]
- if !exist {
- absDest = filepath.Join(currentDir, string(html.DecodeDestination([]byte(dest))))
- }
- fullPath, exist = assets[absDest]
- if exist {
- existName := assetsDone[absDest]
- var name string
- if "" == existName {
- name = filepath.Base(fullPath)
- name = util.AssetName(name)
- assetTargetPath := filepath.Join(assetDirPath, name)
- if err = filelock.Copy(fullPath, assetTargetPath); nil != err {
- logging.LogErrorf("copy asset from [%s] to [%s] failed: %s", fullPath, assetTargetPath, err)
- return ast.WalkContinue
- }
- assetsDone[absDest] = name
- } else {
- name = existName
- }
- if ast.NodeLinkDest == n.Type {
- n.Tokens = []byte("assets/" + name)
- } else {
- n.TextMarkAHref = "assets/" + name
- }
- }
- return ast.WalkContinue
- })
- reassignIDUpdated(tree)
- importTrees = append(importTrees, tree)
- return nil
- })
- } else { // 导入单个文件
- fileName := filepath.Base(localPath)
- if !strings.HasSuffix(fileName, ".md") && !strings.HasSuffix(fileName, ".markdown") {
- return errors.New(Conf.Language(79))
- }
- title := strings.TrimSuffix(fileName, ".markdown")
- title = strings.TrimSuffix(title, ".md")
- targetPath := strings.TrimSuffix(toPath, ".sy")
- id := ast.NewNodeID()
- targetPath = path.Join(targetPath, id+".sy")
- var data []byte
- data, err = os.ReadFile(localPath)
- if nil != err {
- return err
- }
- tree := parseStdMd(data)
- if nil == tree {
- msg := fmt.Sprintf("parse tree [%s] failed", localPath)
- logging.LogErrorf(msg)
- return errors.New(msg)
- }
- tree.ID = id
- tree.Root.ID = id
- tree.Root.SetIALAttr("id", tree.Root.ID)
- tree.Root.SetIALAttr("title", title)
- tree.Box = boxID
- tree.Path = targetPath
- tree.HPath = path.Join(baseHPath, title)
- tree.Root.Spec = "1"
- docDirLocalPath := filepath.Dir(filepath.Join(boxLocalPath, targetPath))
- assetDirPath := getAssetsDir(boxLocalPath, docDirLocalPath)
- ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
- if !entering || (ast.NodeLinkDest != n.Type && !n.IsTextMarkType("a")) {
- return ast.WalkContinue
- }
- var dest string
- if ast.NodeLinkDest == n.Type {
- dest = n.TokensStr()
- } else {
- dest = n.TextMarkAHref
- }
- if strings.HasPrefix(dest, "data:image") && strings.Contains(dest, ";base64,") {
- processBase64Img(n, dest, assetDirPath, err)
- return ast.WalkContinue
- }
- dest = strings.ReplaceAll(dest, "%20", " ")
- dest = strings.ReplaceAll(dest, "%5C", "/")
- if ast.NodeLinkDest == n.Type {
- n.Tokens = []byte(dest)
- } else {
- n.TextMarkAHref = dest
- }
- if !util.IsRelativePath(dest) {
- return ast.WalkContinue
- }
- dest = filepath.ToSlash(dest)
- if "" == dest {
- return ast.WalkContinue
- }
- absolutePath := filepath.Join(filepath.Dir(localPath), dest)
- exist := gulu.File.IsExist(absolutePath)
- if !exist {
- absolutePath = filepath.Join(filepath.Dir(localPath), string(html.DecodeDestination([]byte(dest))))
- exist = gulu.File.IsExist(absolutePath)
- }
- if exist {
- name := filepath.Base(absolutePath)
- name = util.AssetName(name)
- assetTargetPath := filepath.Join(assetDirPath, name)
- if err = filelock.Copy(absolutePath, assetTargetPath); nil != err {
- logging.LogErrorf("copy asset from [%s] to [%s] failed: %s", absolutePath, assetTargetPath, err)
- return ast.WalkContinue
- }
- if ast.NodeLinkDest == n.Type {
- n.Tokens = []byte("assets/" + name)
- } else {
- n.TextMarkAHref = "assets/" + name
- }
- }
- return ast.WalkContinue
- })
- reassignIDUpdated(tree)
- importTrees = append(importTrees, tree)
- }
- if 0 < len(importTrees) {
- initSearchLinks()
- convertWikiLinksAndTags()
- buildBlockRefInText()
- for i, tree := range importTrees {
- indexWriteTreeIndexQueue(tree)
- if 0 == i%4 {
- util.PushEndlessProgress(fmt.Sprintf(Conf.Language(66), fmt.Sprintf("%d/%d ", i, len(importTrees))+tree.HPath))
- }
- }
- util.PushClearProgress()
- importTrees = []*parse.Tree{}
- searchLinks = map[string]string{}
- }
- IncSync()
- debug.FreeOSMemory()
- return
- }
- func parseStdMd(markdown []byte) (ret *parse.Tree) {
- luteEngine := util.NewStdLute()
- ret = parse.Parse("", markdown, luteEngine.ParseOptions)
- if nil == ret {
- return
- }
- genTreeID(ret)
- imgHtmlBlock2InlineImg(ret)
- parse.NestedInlines2FlattedSpansHybrid(ret, false)
- return
- }
- func processBase64Img(n *ast.Node, dest string, assetDirPath string, err error) {
- base64TmpDir := filepath.Join(util.TempDir, "base64")
- os.MkdirAll(base64TmpDir, 0755)
- sep := strings.Index(dest, ";base64,")
- str := strings.TrimSpace(dest[sep+8:])
- re := regexp.MustCompile(`(?i)%0A`)
- str = re.ReplaceAllString(str, "\n")
- var decodeErr error
- unbased, decodeErr := base64.StdEncoding.DecodeString(str)
- if nil != decodeErr {
- logging.LogErrorf("decode base64 image failed: %s", decodeErr)
- return
- }
- dataReader := bytes.NewReader(unbased)
- var img image.Image
- var ext string
- typ := dest[5:sep]
- switch typ {
- case "image/png":
- img, decodeErr = png.Decode(dataReader)
- ext = ".png"
- case "image/jpeg":
- img, decodeErr = jpeg.Decode(dataReader)
- ext = ".jpg"
- default:
- logging.LogWarnf("unsupported base64 image type [%s]", typ)
- return
- }
- if nil != decodeErr {
- logging.LogErrorf("decode base64 image failed: %s", decodeErr)
- return
- }
- name := "image" + ext
- alt := n.Parent.ChildByType(ast.NodeLinkText)
- if nil != alt {
- name = alt.TokensStr() + ext
- }
- name = util.FilterUploadFileName(name)
- name = util.AssetName(name)
- tmp := filepath.Join(base64TmpDir, name)
- tmpFile, openErr := os.OpenFile(tmp, os.O_RDWR|os.O_CREATE, 0644)
- if nil != openErr {
- logging.LogErrorf("open temp file [%s] failed: %s", tmp, openErr)
- return
- }
- var encodeErr error
- switch typ {
- case "image/png":
- encodeErr = png.Encode(tmpFile, img)
- case "image/jpeg":
- encodeErr = jpeg.Encode(tmpFile, img, &jpeg.Options{Quality: 100})
- }
- if nil != encodeErr {
- logging.LogErrorf("encode base64 image failed: %s", encodeErr)
- tmpFile.Close()
- return
- }
- tmpFile.Close()
- assetTargetPath := filepath.Join(assetDirPath, name)
- if err = filelock.Copy(tmp, assetTargetPath); nil != err {
- logging.LogErrorf("copy asset from [%s] to [%s] failed: %s", tmp, assetTargetPath, err)
- return
- }
- n.Tokens = []byte("assets/" + name)
- }
- func imgHtmlBlock2InlineImg(tree *parse.Tree) {
- imgHtmlBlocks := map[*ast.Node]*html.Node{}
- ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
- if !entering {
- return ast.WalkContinue
- }
- if ast.NodeHTMLBlock == n.Type {
- htmlNodes, pErr := html.ParseFragment(bytes.NewReader(n.Tokens), &html.Node{Type: html.ElementNode})
- if nil != pErr {
- logging.LogErrorf("parse html block [%s] failed: %s", n.Tokens, pErr)
- return ast.WalkContinue
- }
- if 1 > len(htmlNodes) {
- return ast.WalkContinue
- }
- for _, htmlNode := range htmlNodes {
- if atom.Img == htmlNode.DataAtom {
- imgHtmlBlocks[n] = htmlNode
- break
- }
- }
- }
- return ast.WalkContinue
- })
- for n, htmlImg := range imgHtmlBlocks {
- src := domAttrValue(htmlImg, "src")
- alt := domAttrValue(htmlImg, "alt")
- title := domAttrValue(htmlImg, "title")
- p := &ast.Node{Type: ast.NodeParagraph, ID: n.ID}
- img := &ast.Node{Type: ast.NodeImage}
- p.AppendChild(img)
- img.AppendChild(&ast.Node{Type: ast.NodeBang})
- img.AppendChild(&ast.Node{Type: ast.NodeOpenBracket})
- img.AppendChild(&ast.Node{Type: ast.NodeLinkText, Tokens: []byte(alt)})
- img.AppendChild(&ast.Node{Type: ast.NodeCloseBracket})
- img.AppendChild(&ast.Node{Type: ast.NodeOpenParen})
- img.AppendChild(&ast.Node{Type: ast.NodeLinkDest, Tokens: []byte(src)})
- if "" != title {
- img.AppendChild(&ast.Node{Type: ast.NodeLinkSpace})
- img.AppendChild(&ast.Node{Type: ast.NodeLinkTitle})
- }
- img.AppendChild(&ast.Node{Type: ast.NodeCloseParen})
- n.InsertBefore(p)
- n.Unlink()
- }
- return
- }
- func reassignIDUpdated(tree *parse.Tree) {
- var blockCount int
- ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
- if !entering || "" == n.ID {
- return ast.WalkContinue
- }
- blockCount++
- return ast.WalkContinue
- })
- ids := make([]string, blockCount)
- min, _ := strconv.ParseInt(time.Now().Add(-1*time.Duration(blockCount)*time.Second).Format("20060102150405"), 10, 64)
- for i := 0; i < blockCount; i++ {
- ids[i] = newID(fmt.Sprintf("%d", min))
- min++
- }
- var i int
- ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
- if !entering || "" == n.ID {
- return ast.WalkContinue
- }
- n.ID = ids[i]
- n.SetIALAttr("id", n.ID)
- n.SetIALAttr("updated", util.TimeFromID(n.ID))
- i++
- return ast.WalkContinue
- })
- tree.ID = tree.Root.ID
- tree.Path = path.Join(path.Dir(tree.Path), tree.ID+".sy")
- tree.Root.SetIALAttr("id", tree.Root.ID)
- }
- func newID(t string) string {
- return t + "-" + randStr(7)
- }
- func randStr(length int) string {
- letter := []rune("abcdefghijklmnopqrstuvwxyz0123456789")
- b := make([]rune, length)
- for i := range b {
- b[i] = letter[rand.Intn(len(letter))]
- }
- return string(b)
- }
- func domAttrValue(n *html.Node, attrName string) string {
- if nil == n {
- return ""
- }
- for _, attr := range n.Attr {
- if attr.Key == attrName {
- return attr.Val
- }
- }
- return ""
- }
- var importTrees []*parse.Tree
- var searchLinks = map[string]string{}
- func initSearchLinks() {
- for _, tree := range importTrees {
- ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
- if !entering || (ast.NodeDocument != n.Type && ast.NodeHeading != n.Type) {
- return ast.WalkContinue
- }
- nodePath := tree.HPath + "#"
- if ast.NodeHeading == n.Type {
- nodePath += n.Text()
- }
- searchLinks[nodePath] = n.ID
- return ast.WalkContinue
- })
- }
- }
- func convertWikiLinksAndTags() {
- for _, tree := range importTrees {
- convertWikiLinksAndTags0(tree)
- }
- }
- func convertWikiLinksAndTags0(tree *parse.Tree) {
- ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
- if !entering || ast.NodeText != n.Type {
- return ast.WalkContinue
- }
- text := n.TokensStr()
- length := len(text)
- start, end := 0, length
- for {
- part := text[start:end]
- if idx := strings.Index(part, "]]"); 0 > idx {
- break
- } else {
- end = start + idx
- }
- if idx := strings.Index(part, "[["); 0 > idx {
- break
- } else {
- start += idx
- }
- if end <= start {
- break
- }
- link := path.Join(path.Dir(tree.HPath), text[start+2:end]) // 统一转为绝对路径方便后续查找
- linkText := path.Base(link)
- dynamicAnchorText := true
- if linkParts := strings.Split(link, "|"); 1 < len(linkParts) {
- link = linkParts[0]
- linkText = linkParts[1]
- dynamicAnchorText = false
- }
- link, linkText = strings.TrimSpace(link), strings.TrimSpace(linkText)
- if !strings.Contains(link, "#") {
- link += "#" // 在结尾统一带上锚点方便后续查找
- }
- id := searchLinkID(link)
- if "" == id {
- start, end = end, length
- continue
- }
- linkText = strings.TrimPrefix(linkText, "/")
- repl := "((" + id + " '" + linkText + "'))"
- if !dynamicAnchorText {
- repl = "((" + id + " \"" + linkText + "\"))"
- }
- end += 2
- text = text[:start] + repl + text[end:]
- start, end = start+len(repl), len(text)
- length = end
- }
- text = convertTags(text) // 导入标签语法
- n.Tokens = gulu.Str.ToBytes(text)
- return ast.WalkContinue
- })
- }
- func convertTags(text string) (ret string) {
- if !util.MarkdownSettings.InlineTag {
- return text
- }
- pos, i := -1, 0
- tokens := []byte(text)
- for ; i < len(tokens); i++ {
- if '#' == tokens[i] && (0 == i || ' ' == tokens[i-1] || (-1 < pos && '#' == tokens[pos])) {
- if i < len(tokens)-1 && '#' == tokens[i+1] {
- pos = -1
- continue
- }
- pos = i
- continue
- }
- if -1 < pos && ' ' == tokens[i] {
- tokens = append(tokens, 0)
- copy(tokens[i+1:], tokens[i:])
- tokens[i] = '#'
- pos = -1
- i++
- }
- }
- if -1 < pos && pos < i {
- tokens = append(tokens, '#')
- }
- return string(tokens)
- }
- // buildBlockRefInText 将文本节点进行结构化处理。
- func buildBlockRefInText() {
- lute := NewLute()
- lute.SetHTMLTag2TextMark(true)
- for _, tree := range importTrees {
- tree.MergeText()
- var unlinkTextNodes []*ast.Node
- ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
- if !entering || ast.NodeText != n.Type {
- return ast.WalkContinue
- }
- if nil == n.Tokens {
- return ast.WalkContinue
- }
- t := parse.Inline("", n.Tokens, lute.ParseOptions) // 使用行级解析
- parse.NestedInlines2FlattedSpans(t, false)
- var children []*ast.Node
- for c := t.Root.FirstChild.FirstChild; nil != c; c = c.Next {
- children = append(children, c)
- }
- for _, c := range children {
- n.InsertBefore(c)
- }
- unlinkTextNodes = append(unlinkTextNodes, n)
- return ast.WalkContinue
- })
- for _, node := range unlinkTextNodes {
- node.Unlink()
- }
- }
- }
- func searchLinkID(link string) (id string) {
- id = searchLinks[link]
- if "" != id {
- return
- }
- baseName := path.Base(link)
- for searchLink, searchID := range searchLinks {
- if path.Base(searchLink) == baseName {
- return searchID
- }
- }
- return
- }
|