1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777 |
- // SiYuan - Refactor your thinking
- // Copyright (c) 2020-present, b3log.org
- //
- // This program is free software: you can redistribute it and/or modify
- // it under the terms of the GNU Affero General Public License as published by
- // the Free Software Foundation, either version 3 of the License, or
- // (at your option) any later version.
- //
- // This program is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU Affero General Public License for more details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with this program. If not, see <https://www.gnu.org/licenses/>.
- package model
- import (
- "bytes"
- "errors"
- "fmt"
- "math"
- "os"
- "path"
- "path/filepath"
- "regexp"
- "sort"
- "strconv"
- "strings"
- "sync"
- "time"
- "unicode/utf8"
- "github.com/88250/gulu"
- "github.com/88250/lute"
- "github.com/88250/lute/ast"
- "github.com/88250/lute/html"
- "github.com/88250/lute/lex"
- "github.com/88250/lute/parse"
- "github.com/88250/vitess-sqlparser/sqlparser"
- "github.com/jinzhu/copier"
- "github.com/siyuan-note/filelock"
- "github.com/siyuan-note/logging"
- "github.com/siyuan-note/siyuan/kernel/conf"
- "github.com/siyuan-note/siyuan/kernel/search"
- "github.com/siyuan-note/siyuan/kernel/sql"
- "github.com/siyuan-note/siyuan/kernel/task"
- "github.com/siyuan-note/siyuan/kernel/treenode"
- "github.com/siyuan-note/siyuan/kernel/util"
- "github.com/xrash/smetrics"
- )
- func ListInvalidBlockRefs(page, pageSize int) (ret []*Block, matchedBlockCount, matchedRootCount, pageCount int) {
- refBlockMap := map[string][]string{}
- blockMap := map[string]bool{}
- var invalidBlockIDs []string
- notebooks, err := ListNotebooks()
- if nil != err {
- return
- }
- luteEngine := util.NewLute()
- for _, notebook := range notebooks {
- pages := pagedPaths(filepath.Join(util.DataDir, notebook.ID), 32)
- for _, paths := range pages {
- var trees []*parse.Tree
- for _, localPath := range paths {
- tree, loadTreeErr := loadTree(localPath, luteEngine)
- if nil != loadTreeErr {
- continue
- }
- trees = append(trees, tree)
- }
- for _, tree := range trees {
- ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
- if entering {
- if n.IsBlock() {
- blockMap[n.ID] = true
- return ast.WalkContinue
- }
- if ast.NodeTextMark == n.Type {
- if n.IsTextMarkType("a") {
- if strings.HasPrefix(n.TextMarkAHref, "siyuan://blocks/") {
- defID := strings.TrimPrefix(n.TextMarkAHref, "siyuan://blocks/")
- if strings.Contains(defID, "?") {
- defID = strings.Split(defID, "?")[0]
- }
- refID := treenode.ParentBlock(n).ID
- if defIDs := refBlockMap[refID]; 1 > len(defIDs) {
- refBlockMap[refID] = []string{defID}
- } else {
- refBlockMap[refID] = append(defIDs, defID)
- }
- }
- } else if n.IsTextMarkType("block-ref") {
- defID := n.TextMarkBlockRefID
- refID := treenode.ParentBlock(n).ID
- if defIDs := refBlockMap[refID]; 1 > len(defIDs) {
- refBlockMap[refID] = []string{defID}
- } else {
- refBlockMap[refID] = append(defIDs, defID)
- }
- }
- }
- }
- return ast.WalkContinue
- })
- }
- }
- }
- invalidDefIDs := map[string]bool{}
- for _, refDefIDs := range refBlockMap {
- for _, defID := range refDefIDs {
- invalidDefIDs[defID] = true
- }
- }
- var toRemoves []string
- for defID, _ := range invalidDefIDs {
- if _, ok := blockMap[defID]; ok {
- toRemoves = append(toRemoves, defID)
- }
- }
- for _, toRemove := range toRemoves {
- delete(invalidDefIDs, toRemove)
- }
- toRemoves = nil
- for refID, defIDs := range refBlockMap {
- var tmp []string
- for _, defID := range defIDs {
- if _, ok := invalidDefIDs[defID]; !ok {
- tmp = append(tmp, defID)
- }
- }
- for _, toRemove := range tmp {
- defIDs = gulu.Str.RemoveElem(defIDs, toRemove)
- }
- if 1 > len(defIDs) {
- toRemoves = append(toRemoves, refID)
- }
- }
- for _, toRemove := range toRemoves {
- delete(refBlockMap, toRemove)
- }
- for refID, _ := range refBlockMap {
- invalidBlockIDs = append(invalidBlockIDs, refID)
- }
- invalidBlockIDs = gulu.Str.RemoveDuplicatedElem(invalidBlockIDs)
- sort.Strings(invalidBlockIDs)
- allInvalidBlockIDs := invalidBlockIDs
- start := (page - 1) * pageSize
- end := page * pageSize
- if end > len(invalidBlockIDs) {
- end = len(invalidBlockIDs)
- }
- invalidBlockIDs = invalidBlockIDs[start:end]
- sqlBlocks := sql.GetBlocks(invalidBlockIDs)
- var tmp []*sql.Block
- for _, sqlBlock := range sqlBlocks {
- if nil != sqlBlock {
- tmp = append(tmp, sqlBlock)
- }
- }
- sqlBlocks = tmp
- ret = fromSQLBlocks(&sqlBlocks, "", 36)
- if 1 > len(ret) {
- ret = []*Block{}
- }
- matchedBlockCount = len(allInvalidBlockIDs)
- rootCount := map[string]bool{}
- for _, id := range allInvalidBlockIDs {
- bt := treenode.GetBlockTree(id)
- if nil == bt {
- continue
- }
- rootCount[bt.RootID] = true
- }
- matchedRootCount = len(rootCount)
- pageCount = (matchedBlockCount + pageSize - 1) / pageSize
- return
- }
- type EmbedBlock struct {
- Block *Block `json:"block"`
- BlockPaths []*BlockPath `json:"blockPaths"`
- }
- func UpdateEmbedBlock(id, content string) (err error) {
- bt := treenode.GetBlockTree(id)
- if nil == bt {
- err = ErrBlockNotFound
- return
- }
- if treenode.TypeAbbr(ast.NodeBlockQueryEmbed.String()) != bt.Type {
- err = errors.New("not query embed block")
- return
- }
- embedBlock := &EmbedBlock{
- Block: &Block{
- Markdown: content,
- },
- }
- updateEmbedBlockContent(id, []*EmbedBlock{embedBlock})
- return
- }
- func GetEmbedBlock(embedBlockID string, includeIDs []string, headingMode int, breadcrumb bool) (ret []*EmbedBlock) {
- return getEmbedBlock(embedBlockID, includeIDs, headingMode, breadcrumb)
- }
- func getEmbedBlock(embedBlockID string, includeIDs []string, headingMode int, breadcrumb bool) (ret []*EmbedBlock) {
- stmt := "SELECT * FROM `blocks` WHERE `id` IN ('" + strings.Join(includeIDs, "','") + "')"
- sqlBlocks := sql.SelectBlocksRawStmtNoParse(stmt, 1024)
- // 根据 includeIDs 的顺序排序 Improve `//!js` query embed block result sorting https://github.com/siyuan-note/siyuan/issues/9977
- m := map[string]int{}
- for i, id := range includeIDs {
- m[id] = i
- }
- sort.Slice(sqlBlocks, func(i, j int) bool {
- return m[sqlBlocks[i].ID] < m[sqlBlocks[j].ID]
- })
- ret = buildEmbedBlock(embedBlockID, []string{}, headingMode, breadcrumb, sqlBlocks)
- return
- }
- func SearchEmbedBlock(embedBlockID, stmt string, excludeIDs []string, headingMode int, breadcrumb bool) (ret []*EmbedBlock) {
- return searchEmbedBlock(embedBlockID, stmt, excludeIDs, headingMode, breadcrumb)
- }
- func searchEmbedBlock(embedBlockID, stmt string, excludeIDs []string, headingMode int, breadcrumb bool) (ret []*EmbedBlock) {
- sqlBlocks := sql.SelectBlocksRawStmtNoParse(stmt, Conf.Search.Limit)
- ret = buildEmbedBlock(embedBlockID, excludeIDs, headingMode, breadcrumb, sqlBlocks)
- return
- }
- func buildEmbedBlock(embedBlockID string, excludeIDs []string, headingMode int, breadcrumb bool, sqlBlocks []*sql.Block) (ret []*EmbedBlock) {
- var tmp []*sql.Block
- for _, b := range sqlBlocks {
- if "query_embed" == b.Type { // 嵌入块不再嵌入
- // 嵌入块支持搜索 https://github.com/siyuan-note/siyuan/issues/7112
- // 这里会导致上面的 limit 限制不准确,导致结果变少,暂时没有解决方案,只能靠用户自己调整 SQL,加上 type != 'query_embed' 的条件
- continue
- }
- if !gulu.Str.Contains(b.ID, excludeIDs) {
- tmp = append(tmp, b)
- }
- }
- sqlBlocks = tmp
- // 缓存最多 128 棵语法树
- trees := map[string]*parse.Tree{}
- count := 0
- for _, sb := range sqlBlocks {
- if nil == trees[sb.RootID] {
- tree, _ := LoadTreeByBlockID(sb.RootID)
- if nil == tree {
- continue
- }
- trees[sb.RootID] = tree
- count++
- }
- if 127 < count {
- break
- }
- }
- for _, sb := range sqlBlocks {
- block, blockPaths := getEmbeddedBlock(trees, sb, headingMode, breadcrumb)
- if nil == block {
- continue
- }
- ret = append(ret, &EmbedBlock{
- Block: block,
- BlockPaths: blockPaths,
- })
- }
- // 嵌入块支持搜索 https://github.com/siyuan-note/siyuan/issues/7112
- task.AppendTaskWithTimeout(task.DatabaseIndexEmbedBlock, 30*time.Second, updateEmbedBlockContent, embedBlockID, ret)
- // 添加笔记本名称
- var boxIDs []string
- for _, embedBlock := range ret {
- boxIDs = append(boxIDs, embedBlock.Block.Box)
- }
- boxIDs = gulu.Str.RemoveDuplicatedElem(boxIDs)
- boxNames := Conf.BoxNames(boxIDs)
- for _, embedBlock := range ret {
- name := boxNames[embedBlock.Block.Box]
- embedBlock.Block.HPath = name + embedBlock.Block.HPath
- }
- if 1 > len(ret) {
- ret = []*EmbedBlock{}
- }
- return
- }
- func SearchRefBlock(id, rootID, keyword string, beforeLen int, isSquareBrackets, isDatabase bool) (ret []*Block, newDoc bool) {
- cachedTrees := map[string]*parse.Tree{}
- onlyDoc := false
- if isSquareBrackets {
- onlyDoc = Conf.Editor.OnlySearchForDoc
- }
- if "" == keyword {
- // 查询为空时默认的块引排序规则按最近使用优先 https://github.com/siyuan-note/siyuan/issues/3218
- refs := sql.QueryRefsRecent(onlyDoc)
- for _, ref := range refs {
- tree := cachedTrees[ref.DefBlockRootID]
- if nil == tree {
- tree, _ = LoadTreeByBlockID(ref.DefBlockRootID)
- }
- if nil == tree {
- continue
- }
- cachedTrees[ref.RootID] = tree
- node := treenode.GetNodeInTree(tree, ref.DefBlockID)
- if nil == node {
- continue
- }
- sqlBlock := sql.BuildBlockFromNode(node, tree)
- if nil == sqlBlock {
- return
- }
- block := fromSQLBlock(sqlBlock, "", 0)
- block.RefText = getNodeRefText(node)
- block.RefText = maxContent(block.RefText, Conf.Editor.BlockRefDynamicAnchorTextMaxLen)
- ret = append(ret, block)
- }
- if 1 > len(ret) {
- ret = []*Block{}
- }
- // 在 hPath 中加入笔记本名 Show notebooks in hpath of block ref search list results https://github.com/siyuan-note/siyuan/issues/9378
- prependNotebookNameInHPath(ret)
- return
- }
- ret = fullTextSearchRefBlock(keyword, beforeLen, onlyDoc)
- tmp := ret[:0]
- for _, b := range ret {
- tree := cachedTrees[b.RootID]
- if nil == tree {
- tree, _ = LoadTreeByBlockID(b.RootID)
- }
- if nil == tree {
- continue
- }
- cachedTrees[b.RootID] = tree
- b.RefText = getBlockRefText(b.ID, tree)
- hitFirstChildID := false
- if b.IsContainerBlock() && "NodeDocument" != b.Type {
- // `((` 引用候选中排除当前块的父块 https://github.com/siyuan-note/siyuan/issues/4538
- tree := cachedTrees[b.RootID]
- if nil == tree {
- tree, _ = LoadTreeByBlockID(b.RootID)
- cachedTrees[b.RootID] = tree
- }
- if nil != tree {
- bNode := treenode.GetNodeInTree(tree, b.ID)
- if fc := treenode.FirstLeafBlock(bNode); nil != fc && fc.ID == id {
- hitFirstChildID = true
- }
- }
- }
- if "NodeAttributeView" == b.Type {
- // 数据库块可以添加到自身数据库块中,当前文档也可以添加到自身数据库块中
- tmp = append(tmp, b)
- } else {
- // 排除自身块、父块和根块
- if b.ID != id && !hitFirstChildID && b.ID != rootID {
- tmp = append(tmp, b)
- }
- }
- }
- ret = tmp
- if !isDatabase {
- // 如果非数据库中搜索块引,则不允许新建重名文档
- // 如果是数据库中搜索绑定块,则允许新建重名文档 https://github.com/siyuan-note/siyuan/issues/11713
- if block := treenode.GetBlockTree(id); nil != block {
- p := path.Join(block.HPath, keyword)
- newDoc = nil == treenode.GetBlockTreeRootByHPath(block.BoxID, p)
- }
- }
- // 在 hPath 中加入笔记本名 Show notebooks in hpath of block ref search list results https://github.com/siyuan-note/siyuan/issues/9378
- prependNotebookNameInHPath(ret)
- return
- }
- func prependNotebookNameInHPath(blocks []*Block) {
- var boxIDs []string
- for _, b := range blocks {
- boxIDs = append(boxIDs, b.Box)
- }
- boxIDs = gulu.Str.RemoveDuplicatedElem(boxIDs)
- boxNames := Conf.BoxNames(boxIDs)
- for _, b := range blocks {
- name := boxNames[b.Box]
- b.HPath = util.EscapeHTML(name) + b.HPath
- }
- }
- func FindReplace(keyword, replacement string, replaceTypes map[string]bool, ids []string, paths, boxes []string, types map[string]bool, method, orderBy, groupBy int) (err error) {
- // method:0:文本,1:查询语法,2:SQL,3:正则表达式
- if 1 == method || 2 == method {
- err = errors.New(Conf.Language(132))
- return
- }
- if 0 != groupBy {
- // 按文档分组后不支持替换 Need to be reminded that replacement operations are not supported after grouping by doc https://github.com/siyuan-note/siyuan/issues/10161
- // 因为分组条件传入以后搜索只能命中文档块,会导致 全部替换 失效
- err = errors.New(Conf.Language(221))
- return
- }
- // No longer trim spaces for the keyword and replacement https://github.com/siyuan-note/siyuan/issues/9229
- if keyword == replacement {
- return
- }
- r, _ := regexp.Compile(keyword)
- escapedKey := util.EscapeHTML(keyword)
- escapedR, _ := regexp.Compile(escapedKey)
- ids = gulu.Str.RemoveDuplicatedElem(ids)
- var renameRoots []*ast.Node
- renameRootTitles := map[string]string{}
- cachedTrees := map[string]*parse.Tree{}
- historyDir, err := getHistoryDir(HistoryOpReplace, time.Now())
- if nil != err {
- logging.LogErrorf("get history dir failed: %s", err)
- return
- }
- if 1 > len(ids) {
- // `Replace All` is no longer affected by pagination https://github.com/siyuan-note/siyuan/issues/8265
- blocks, _, _, _ := FullTextSearchBlock(keyword, boxes, paths, types, method, orderBy, groupBy, 1, math.MaxInt)
- for _, block := range blocks {
- ids = append(ids, block.ID)
- }
- }
- for _, id := range ids {
- bt := treenode.GetBlockTree(id)
- if nil == bt {
- continue
- }
- tree := cachedTrees[bt.RootID]
- if nil != tree {
- continue
- }
- tree, _ = LoadTreeByBlockID(id)
- if nil == tree {
- continue
- }
- historyPath := filepath.Join(historyDir, tree.Box, tree.Path)
- if err = os.MkdirAll(filepath.Dir(historyPath), 0755); nil != err {
- logging.LogErrorf("generate history failed: %s", err)
- return
- }
- var data []byte
- if data, err = filelock.ReadFile(filepath.Join(util.DataDir, tree.Box, tree.Path)); err != nil {
- logging.LogErrorf("generate history failed: %s", err)
- return
- }
- if err = gulu.File.WriteFileSafer(historyPath, data, 0644); err != nil {
- logging.LogErrorf("generate history failed: %s", err)
- return
- }
- cachedTrees[bt.RootID] = tree
- }
- indexHistoryDir(filepath.Base(historyDir), util.NewLute())
- for i, id := range ids {
- bt := treenode.GetBlockTree(id)
- if nil == bt {
- continue
- }
- tree := cachedTrees[bt.RootID]
- if nil == tree {
- continue
- }
- node := treenode.GetNodeInTree(tree, id)
- if nil == node {
- continue
- }
- if ast.NodeDocument == node.Type {
- if !replaceTypes["docTitle"] {
- continue
- }
- title := node.IALAttr("title")
- if 0 == method {
- if strings.Contains(title, keyword) {
- docTitleReplacement := strings.ReplaceAll(replacement, "/", "")
- renameRootTitles[node.ID] = strings.ReplaceAll(title, keyword, docTitleReplacement)
- renameRoots = append(renameRoots, node)
- }
- } else if 3 == method {
- if nil != r && r.MatchString(title) {
- docTitleReplacement := strings.ReplaceAll(replacement, "/", "")
- renameRootTitles[node.ID] = r.ReplaceAllString(title, docTitleReplacement)
- renameRoots = append(renameRoots, node)
- }
- }
- } else {
- luteEngine := util.NewLute()
- var unlinks []*ast.Node
- ast.Walk(node, func(n *ast.Node, entering bool) ast.WalkStatus {
- if !entering {
- return ast.WalkContinue
- }
- switch n.Type {
- case ast.NodeText:
- if !replaceTypes["text"] {
- return ast.WalkContinue
- }
- if replaceTextNode(n, method, keyword, replacement, r, luteEngine) {
- unlinks = append(unlinks, n)
- }
- case ast.NodeLinkDest:
- if !replaceTypes["imgSrc"] {
- return ast.WalkContinue
- }
- replaceNodeTokens(n, method, keyword, replacement, r)
- case ast.NodeLinkText:
- if !replaceTypes["imgText"] {
- return ast.WalkContinue
- }
- replaceNodeTokens(n, method, keyword, replacement, r)
- case ast.NodeLinkTitle:
- if !replaceTypes["imgTitle"] {
- return ast.WalkContinue
- }
- replaceNodeTokens(n, method, keyword, replacement, r)
- case ast.NodeCodeBlockCode:
- if !replaceTypes["codeBlock"] {
- return ast.WalkContinue
- }
- replaceNodeTokens(n, method, keyword, replacement, r)
- case ast.NodeMathBlockContent:
- if !replaceTypes["mathBlock"] {
- return ast.WalkContinue
- }
- replaceNodeTokens(n, method, keyword, replacement, r)
- case ast.NodeHTMLBlock:
- if !replaceTypes["htmlBlock"] {
- return ast.WalkContinue
- }
- replaceNodeTokens(n, method, keyword, replacement, r)
- case ast.NodeTextMark:
- if n.IsTextMarkType("code") {
- if !replaceTypes["code"] {
- return ast.WalkContinue
- }
- if 0 == method {
- if strings.Contains(n.TextMarkTextContent, escapedKey) {
- n.TextMarkTextContent = strings.ReplaceAll(n.TextMarkTextContent, escapedKey, replacement)
- }
- } else if 3 == method {
- if nil != escapedR && escapedR.MatchString(n.TextMarkTextContent) {
- n.TextMarkTextContent = escapedR.ReplaceAllString(n.TextMarkTextContent, replacement)
- }
- }
- } else if n.IsTextMarkType("a") {
- if replaceTypes["aText"] {
- if 0 == method {
- if strings.Contains(n.TextMarkTextContent, keyword) {
- n.TextMarkTextContent = strings.ReplaceAll(n.TextMarkTextContent, keyword, replacement)
- }
- } else if 3 == method {
- if nil != r && r.MatchString(n.TextMarkTextContent) {
- n.TextMarkTextContent = r.ReplaceAllString(n.TextMarkTextContent, replacement)
- }
- }
- }
- if replaceTypes["aTitle"] {
- if 0 == method {
- if strings.Contains(n.TextMarkATitle, keyword) {
- n.TextMarkATitle = strings.ReplaceAll(n.TextMarkATitle, keyword, replacement)
- }
- } else if 3 == method {
- if nil != r && r.MatchString(n.TextMarkATitle) {
- n.TextMarkATitle = r.ReplaceAllString(n.TextMarkATitle, replacement)
- }
- }
- }
- if replaceTypes["aHref"] {
- if 0 == method {
- if strings.Contains(n.TextMarkAHref, keyword) {
- n.TextMarkAHref = strings.ReplaceAll(n.TextMarkAHref, keyword, replacement)
- }
- } else if 3 == method {
- if nil != r && r.MatchString(n.TextMarkAHref) {
- n.TextMarkAHref = r.ReplaceAllString(n.TextMarkAHref, replacement)
- }
- }
- }
- } else if n.IsTextMarkType("em") {
- if !replaceTypes["em"] {
- return ast.WalkContinue
- }
- replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "em")
- } else if n.IsTextMarkType("strong") {
- if !replaceTypes["strong"] {
- return ast.WalkContinue
- }
- replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "strong")
- } else if n.IsTextMarkType("kbd") {
- if !replaceTypes["kbd"] {
- return ast.WalkContinue
- }
- replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "kbd")
- } else if n.IsTextMarkType("mark") {
- if !replaceTypes["mark"] {
- return ast.WalkContinue
- }
- replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "mark")
- } else if n.IsTextMarkType("s") {
- if !replaceTypes["s"] {
- return ast.WalkContinue
- }
- replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "s")
- } else if n.IsTextMarkType("sub") {
- if !replaceTypes["sub"] {
- return ast.WalkContinue
- }
- replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "sub")
- } else if n.IsTextMarkType("sup") {
- if !replaceTypes["sup"] {
- return ast.WalkContinue
- }
- replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "sup")
- } else if n.IsTextMarkType("tag") {
- if !replaceTypes["tag"] {
- return ast.WalkContinue
- }
- replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "tag")
- } else if n.IsTextMarkType("u") {
- if !replaceTypes["u"] {
- return ast.WalkContinue
- }
- replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "u")
- } else if n.IsTextMarkType("inline-math") {
- if !replaceTypes["inlineMath"] {
- return ast.WalkContinue
- }
- if 0 == method {
- if strings.Contains(n.TextMarkInlineMathContent, keyword) {
- n.TextMarkInlineMathContent = strings.ReplaceAll(n.TextMarkInlineMathContent, keyword, replacement)
- }
- } else if 3 == method {
- if nil != r && r.MatchString(n.TextMarkInlineMathContent) {
- n.TextMarkInlineMathContent = r.ReplaceAllString(n.TextMarkInlineMathContent, replacement)
- }
- }
- } else if n.IsTextMarkType("inline-memo") {
- if !replaceTypes["inlineMemo"] {
- return ast.WalkContinue
- }
- if 0 == method {
- if strings.Contains(n.TextMarkInlineMemoContent, keyword) {
- n.TextMarkInlineMemoContent = strings.ReplaceAll(n.TextMarkInlineMemoContent, keyword, replacement)
- }
- } else if 3 == method {
- if nil != r && r.MatchString(n.TextMarkInlineMemoContent) {
- n.TextMarkInlineMemoContent = r.ReplaceAllString(n.TextMarkInlineMemoContent, replacement)
- }
- }
- } else if n.IsTextMarkType("text") {
- // Search and replace fails in some cases https://github.com/siyuan-note/siyuan/issues/10016
- if !replaceTypes["text"] {
- return ast.WalkContinue
- }
- replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "text")
- }
- }
- return ast.WalkContinue
- })
- for _, unlink := range unlinks {
- unlink.Unlink()
- }
- if err = writeTreeUpsertQueue(tree); nil != err {
- return
- }
- }
- util.PushEndlessProgress(fmt.Sprintf(Conf.Language(206), i+1, len(ids)))
- }
- for i, renameRoot := range renameRoots {
- newTitle := renameRootTitles[renameRoot.ID]
- RenameDoc(renameRoot.Box, renameRoot.Path, newTitle)
- util.PushEndlessProgress(fmt.Sprintf(Conf.Language(207), i+1, len(renameRoots)))
- }
- WaitForWritingFiles()
- if 0 < len(ids) {
- go func() {
- time.Sleep(time.Millisecond * 500)
- util.ReloadUI()
- }()
- }
- return
- }
- func replaceNodeTextMarkTextContent(n *ast.Node, method int, keyword string, replacement string, r *regexp.Regexp, typ string) {
- if 0 == method {
- if "tag" == typ {
- keyword = strings.TrimPrefix(keyword, "#")
- keyword = strings.TrimSuffix(keyword, "#")
- }
- if strings.Contains(n.TextMarkTextContent, keyword) {
- n.TextMarkTextContent = strings.ReplaceAll(n.TextMarkTextContent, keyword, replacement)
- }
- } else if 3 == method {
- if nil != r && r.MatchString(n.TextMarkTextContent) {
- n.TextMarkTextContent = r.ReplaceAllString(n.TextMarkTextContent, replacement)
- }
- }
- }
- // replaceTextNode 替换文本节点为其他节点。
- // Supports replacing text elements with other elements https://github.com/siyuan-note/siyuan/issues/11058
- func replaceTextNode(text *ast.Node, method int, keyword string, replacement string, r *regexp.Regexp, luteEngine *lute.Lute) bool {
- if 0 == method {
- if bytes.Contains(text.Tokens, []byte(keyword)) {
- newContent := bytes.ReplaceAll(text.Tokens, []byte(keyword), []byte(replacement))
- tree := parse.Inline("", newContent, luteEngine.ParseOptions)
- if nil == tree.Root.FirstChild {
- return false
- }
- parse.NestedInlines2FlattedSpans(tree, false)
- var replaceNodes []*ast.Node
- for rNode := tree.Root.FirstChild.FirstChild; nil != rNode; rNode = rNode.Next {
- replaceNodes = append(replaceNodes, rNode)
- }
- for _, rNode := range replaceNodes {
- text.InsertBefore(rNode)
- }
- return true
- }
- } else if 3 == method {
- if nil != r && r.MatchString(string(text.Tokens)) {
- newContent := []byte(r.ReplaceAllString(string(text.Tokens), replacement))
- tree := parse.Inline("", newContent, luteEngine.ParseOptions)
- if nil == tree.Root.FirstChild {
- return false
- }
- var replaceNodes []*ast.Node
- for rNode := tree.Root.FirstChild.FirstChild; nil != rNode; rNode = rNode.Next {
- replaceNodes = append(replaceNodes, rNode)
- }
- for _, rNode := range replaceNodes {
- text.InsertBefore(rNode)
- }
- return true
- }
- }
- return false
- }
- func replaceNodeTokens(n *ast.Node, method int, keyword string, replacement string, r *regexp.Regexp) {
- if 0 == method {
- if bytes.Contains(n.Tokens, []byte(keyword)) {
- n.Tokens = bytes.ReplaceAll(n.Tokens, []byte(keyword), []byte(replacement))
- }
- } else if 3 == method {
- if nil != r && r.MatchString(string(n.Tokens)) {
- n.Tokens = []byte(r.ReplaceAllString(string(n.Tokens), replacement))
- }
- }
- }
- // FullTextSearchBlock 搜索内容块。
- //
- // method:0:关键字,1:查询语法,2:SQL,3:正则表达式
- // orderBy: 0:按块类型(默认),1:按创建时间升序,2:按创建时间降序,3:按更新时间升序,4:按更新时间降序,5:按内容顺序(仅在按文档分组时),6:按相关度升序,7:按相关度降序
- // groupBy:0:不分组,1:按文档分组
- func FullTextSearchBlock(query string, boxes, paths []string, types map[string]bool, method, orderBy, groupBy, page, pageSize int) (ret []*Block, matchedBlockCount, matchedRootCount, pageCount int) {
- ret = []*Block{}
- if "" == query {
- return
- }
- trimQuery := strings.TrimSpace(query)
- if "" != trimQuery {
- query = trimQuery
- }
- beforeLen := 36
- var blocks []*Block
- orderByClause := buildOrderBy(query, method, orderBy)
- switch method {
- case 1: // 查询语法
- filter := buildTypeFilter(types)
- boxFilter := buildBoxesFilter(boxes)
- pathFilter := buildPathsFilter(paths)
- blocks, matchedBlockCount, matchedRootCount = fullTextSearchByQuerySyntax(query, boxFilter, pathFilter, filter, orderByClause, beforeLen, page, pageSize)
- case 2: // SQL
- blocks, matchedBlockCount, matchedRootCount = searchBySQL(query, beforeLen, page, pageSize)
- case 3: // 正则表达式
- typeFilter := buildTypeFilter(types)
- boxFilter := buildBoxesFilter(boxes)
- pathFilter := buildPathsFilter(paths)
- blocks, matchedBlockCount, matchedRootCount = fullTextSearchByRegexp(query, boxFilter, pathFilter, typeFilter, orderByClause, beforeLen, page, pageSize)
- default: // 关键字
- filter := buildTypeFilter(types)
- boxFilter := buildBoxesFilter(boxes)
- pathFilter := buildPathsFilter(paths)
- blocks, matchedBlockCount, matchedRootCount = fullTextSearchByKeyword(query, boxFilter, pathFilter, filter, orderByClause, beforeLen, page, pageSize)
- }
- pageCount = (matchedBlockCount + pageSize - 1) / pageSize
- switch groupBy {
- case 0: // 不分组
- ret = blocks
- case 1: // 按文档分组
- rootMap := map[string]bool{}
- var rootIDs []string
- contentSorts := map[string]int{}
- for _, b := range blocks {
- if _, ok := rootMap[b.RootID]; !ok {
- rootMap[b.RootID] = true
- rootIDs = append(rootIDs, b.RootID)
- tree, _ := LoadTreeByBlockID(b.RootID)
- if nil == tree {
- continue
- }
- if 5 == orderBy { // 按内容顺序(仅在按文档分组时)
- sort := 0
- ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
- if !entering || !n.IsBlock() {
- return ast.WalkContinue
- }
- contentSorts[n.ID] = sort
- sort++
- return ast.WalkContinue
- })
- }
- }
- }
- sqlRoots := sql.GetBlocks(rootIDs)
- roots := fromSQLBlocks(&sqlRoots, "", beforeLen)
- for _, root := range roots {
- for _, b := range blocks {
- if 5 == orderBy { // 按内容顺序(仅在按文档分组时)
- b.Sort = contentSorts[b.ID]
- }
- if b.RootID == root.ID {
- root.Children = append(root.Children, b)
- }
- }
- switch orderBy {
- case 1: //按创建时间升序
- sort.Slice(root.Children, func(i, j int) bool { return root.Children[i].Created < root.Children[j].Created })
- case 2: // 按创建时间降序
- sort.Slice(root.Children, func(i, j int) bool { return root.Children[i].Created > root.Children[j].Created })
- case 3: // 按更新时间升序
- sort.Slice(root.Children, func(i, j int) bool { return root.Children[i].Updated < root.Children[j].Updated })
- case 4: // 按更新时间降序
- sort.Slice(root.Children, func(i, j int) bool { return root.Children[i].Updated > root.Children[j].Updated })
- case 5: // 按内容顺序(仅在按文档分组时)
- sort.Slice(root.Children, func(i, j int) bool { return root.Children[i].Sort < root.Children[j].Sort })
- default: // 按块类型(默认)
- sort.Slice(root.Children, func(i, j int) bool { return root.Children[i].Sort < root.Children[j].Sort })
- }
- }
- switch orderBy {
- case 1: //按创建时间升序
- sort.Slice(roots, func(i, j int) bool { return roots[i].Created < roots[j].Created })
- case 2: // 按创建时间降序
- sort.Slice(roots, func(i, j int) bool { return roots[i].Created > roots[j].Created })
- case 3: // 按更新时间升序
- sort.Slice(roots, func(i, j int) bool { return roots[i].Updated < roots[j].Updated })
- case 4: // 按更新时间降序
- sort.Slice(roots, func(i, j int) bool { return roots[i].Updated > roots[j].Updated })
- case 5: // 按内容顺序(仅在按文档分组时)
- // 都是文档,不需要再次排序
- case 6, 7: // 按相关度
- // 已在 ORDER BY 中处理
- default: // 按块类型(默认)
- // 都是文档,不需要再次排序
- }
- ret = roots
- default:
- ret = blocks
- }
- if 1 > len(ret) {
- ret = []*Block{}
- }
- return
- }
- func buildBoxesFilter(boxes []string) string {
- if 0 == len(boxes) {
- return ""
- }
- builder := bytes.Buffer{}
- builder.WriteString(" AND (")
- for i, box := range boxes {
- builder.WriteString(fmt.Sprintf("box = '%s'", box))
- if i < len(boxes)-1 {
- builder.WriteString(" OR ")
- }
- }
- builder.WriteString(")")
- return builder.String()
- }
- func buildPathsFilter(paths []string) string {
- if 0 == len(paths) {
- return ""
- }
- builder := bytes.Buffer{}
- builder.WriteString(" AND (")
- for i, path := range paths {
- builder.WriteString(fmt.Sprintf("path LIKE '%s%%'", path))
- if i < len(paths)-1 {
- builder.WriteString(" OR ")
- }
- }
- builder.WriteString(")")
- return builder.String()
- }
- func buildOrderBy(query string, method, orderBy int) string {
- switch orderBy {
- case 1:
- return "ORDER BY created ASC"
- case 2:
- return "ORDER BY created DESC"
- case 3:
- return "ORDER BY updated ASC"
- case 4:
- return "ORDER BY updated DESC"
- case 6:
- if 0 != method && 1 != method {
- // 只有关键字搜索和查询语法搜索才支持按相关度升序 https://github.com/siyuan-note/siyuan/issues/7861
- return "ORDER BY sort DESC, updated DESC"
- }
- return "ORDER BY rank DESC" // 默认是按相关度降序,所以按相关度升序要反过来使用 DESC
- case 7:
- if 0 != method && 1 != method {
- return "ORDER BY sort ASC, updated DESC"
- }
- return "ORDER BY rank" // 默认是按相关度降序
- default:
- clause := "ORDER BY CASE " +
- "WHEN name = '${keyword}' THEN 10 " +
- "WHEN alias = '${keyword}' THEN 20 " +
- "WHEN name LIKE '%${keyword}%' THEN 50 " +
- "WHEN alias LIKE '%${keyword}%' THEN 60 " +
- "ELSE 65535 END ASC, sort ASC, updated DESC"
- clause = strings.ReplaceAll(clause, "${keyword}", strings.ReplaceAll(query, "'", "''"))
- return clause
- }
- }
- func buildTypeFilter(types map[string]bool) string {
- s := conf.NewSearch()
- if err := copier.Copy(s, Conf.Search); nil != err {
- logging.LogErrorf("copy search conf failed: %s", err)
- }
- if nil != types {
- s.Document = types["document"]
- s.Heading = types["heading"]
- s.List = types["list"]
- s.ListItem = types["listItem"]
- s.CodeBlock = types["codeBlock"]
- s.MathBlock = types["mathBlock"]
- s.Table = types["table"]
- s.Blockquote = types["blockquote"]
- s.SuperBlock = types["superBlock"]
- s.Paragraph = types["paragraph"]
- s.HTMLBlock = types["htmlBlock"]
- s.EmbedBlock = types["embedBlock"]
- s.DatabaseBlock = types["databaseBlock"]
- s.AudioBlock = types["audioBlock"]
- s.VideoBlock = types["videoBlock"]
- s.IFrameBlock = types["iframeBlock"]
- s.WidgetBlock = types["widgetBlock"]
- } else {
- s.Document = Conf.Search.Document
- s.Heading = Conf.Search.Heading
- s.List = Conf.Search.List
- s.ListItem = Conf.Search.ListItem
- s.CodeBlock = Conf.Search.CodeBlock
- s.MathBlock = Conf.Search.MathBlock
- s.Table = Conf.Search.Table
- s.Blockquote = Conf.Search.Blockquote
- s.SuperBlock = Conf.Search.SuperBlock
- s.Paragraph = Conf.Search.Paragraph
- s.HTMLBlock = Conf.Search.HTMLBlock
- s.EmbedBlock = Conf.Search.EmbedBlock
- s.DatabaseBlock = Conf.Search.DatabaseBlock
- s.AudioBlock = Conf.Search.AudioBlock
- s.VideoBlock = Conf.Search.VideoBlock
- s.IFrameBlock = Conf.Search.IFrameBlock
- s.WidgetBlock = Conf.Search.WidgetBlock
- }
- return s.TypeFilter()
- }
- func searchBySQL(stmt string, beforeLen, page, pageSize int) (ret []*Block, matchedBlockCount, matchedRootCount int) {
- stmt = filterQueryInvisibleChars(stmt)
- stmt = strings.TrimSpace(stmt)
- blocks := sql.SelectBlocksRawStmt(stmt, page, pageSize)
- ret = fromSQLBlocks(&blocks, "", beforeLen)
- if 1 > len(ret) {
- ret = []*Block{}
- return
- }
- stmt = strings.ToLower(stmt)
- if strings.HasPrefix(stmt, "select a.* ") { // 多个搜索关键字匹配文档 https://github.com/siyuan-note/siyuan/issues/7350
- stmt = strings.ReplaceAll(stmt, "select a.* ", "select COUNT(a.id) AS `matches`, COUNT(DISTINCT(a.root_id)) AS `docs` ")
- } else {
- stmt = strings.ReplaceAll(stmt, "select * ", "select COUNT(id) AS `matches`, COUNT(DISTINCT(root_id)) AS `docs` ")
- }
- stmt = removeLimitClause(stmt)
- result, _ := sql.QueryNoLimit(stmt)
- if 1 > len(ret) {
- return
- }
- matchedBlockCount = int(result[0]["matches"].(int64))
- matchedRootCount = int(result[0]["docs"].(int64))
- return
- }
- func removeLimitClause(stmt string) string {
- parsedStmt, err := sqlparser.Parse(stmt)
- if nil != err {
- return stmt
- }
- switch parsedStmt.(type) {
- case *sqlparser.Select:
- slct := parsedStmt.(*sqlparser.Select)
- if nil != slct.Limit {
- slct.Limit = nil
- }
- stmt = sqlparser.String(slct)
- }
- return stmt
- }
- func fullTextSearchRefBlock(keyword string, beforeLen int, onlyDoc bool) (ret []*Block) {
- keyword = filterQueryInvisibleChars(keyword)
- if id := extractID(keyword); "" != id {
- ret, _, _ = searchBySQL("SELECT * FROM `blocks` WHERE `id` = '"+id+"'", 36, 1, 32)
- return
- }
- quotedKeyword := stringQuery(keyword)
- table := "blocks_fts" // 大小写敏感
- if !Conf.Search.CaseSensitive {
- table = "blocks_fts_case_insensitive"
- }
- projections := "id, parent_id, root_id, hash, box, path, " +
- "snippet(" + table + ", 6, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 64) AS hpath, " +
- "snippet(" + table + ", 7, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 64) AS name, " +
- "snippet(" + table + ", 8, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 64) AS alias, " +
- "snippet(" + table + ", 9, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 64) AS memo, " +
- "tag, " +
- "snippet(" + table + ", 11, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 64) AS content, " +
- "fcontent, markdown, length, type, subtype, ial, sort, created, updated"
- stmt := "SELECT " + projections + " FROM " + table + " WHERE " + table + " MATCH '" + columnFilter() + ":(" + quotedKeyword + ")' AND type"
- if onlyDoc {
- stmt += " = 'd'"
- } else {
- stmt += " IN " + Conf.Search.TypeFilter()
- }
- if ignoreLines := getRefSearchIgnoreLines(); 0 < len(ignoreLines) {
- // Support ignore search results https://github.com/siyuan-note/siyuan/issues/10089
- notLike := bytes.Buffer{}
- for _, line := range ignoreLines {
- notLike.WriteString(" AND ")
- notLike.WriteString(line)
- }
- stmt += notLike.String()
- }
- orderBy := ` ORDER BY CASE
- WHEN name = '${keyword}' THEN 10
- WHEN alias = '${keyword}' THEN 20
- WHEN memo = '${keyword}' THEN 30
- WHEN content = '${keyword}' and type = 'd' THEN 40
- WHEN content LIKE '%${keyword}%' and type = 'd' THEN 41
- WHEN name LIKE '%${keyword}%' THEN 50
- WHEN alias LIKE '%${keyword}%' THEN 60
- WHEN content = '${keyword}' and type = 'h' THEN 70
- WHEN content LIKE '%${keyword}%' and type = 'h' THEN 71
- WHEN fcontent = '${keyword}' and type = 'i' THEN 80
- WHEN fcontent LIKE '%${keyword}%' and type = 'i' THEN 81
- WHEN memo LIKE '%${keyword}%' THEN 90
- WHEN content LIKE '%${keyword}%' and type != 'i' and type != 'l' THEN 100
- ELSE 65535 END ASC, sort ASC, length ASC`
- orderBy = strings.ReplaceAll(orderBy, "${keyword}", strings.ReplaceAll(keyword, "'", "''"))
- stmt += orderBy + " LIMIT " + strconv.Itoa(Conf.Search.Limit)
- blocks := sql.SelectBlocksRawStmtNoParse(stmt, Conf.Search.Limit)
- ret = fromSQLBlocks(&blocks, "", beforeLen)
- if 1 > len(ret) {
- ret = []*Block{}
- }
- return
- }
- func extractID(content string) (ret string) {
- // Improve block ref search ID extraction https://github.com/siyuan-note/siyuan/issues/10848
- if 22 > len(content) {
- return
- }
- // 从第一个字符开始循环,直到找到一个合法的 ID 为止
- for i := 0; i < len(content)-21; i++ {
- if ast.IsNodeIDPattern(content[i : i+22]) {
- ret = content[i : i+22]
- return
- }
- }
- return
- }
- func fullTextSearchByQuerySyntax(query, boxFilter, pathFilter, typeFilter, orderBy string, beforeLen, page, pageSize int) (ret []*Block, matchedBlockCount, matchedRootCount int) {
- query = filterQueryInvisibleChars(query)
- if ast.IsNodeIDPattern(query) {
- ret, matchedBlockCount, matchedRootCount = searchBySQL("SELECT * FROM `blocks` WHERE `id` = '"+query+"'", beforeLen, page, pageSize)
- return
- }
- return fullTextSearchByFTS(query, boxFilter, pathFilter, typeFilter, orderBy, beforeLen, page, pageSize)
- }
- func fullTextSearchByKeyword(query, boxFilter, pathFilter, typeFilter string, orderBy string, beforeLen, page, pageSize int) (ret []*Block, matchedBlockCount, matchedRootCount int) {
- query = filterQueryInvisibleChars(query)
- if ast.IsNodeIDPattern(query) {
- ret, matchedBlockCount, matchedRootCount = searchBySQL("SELECT * FROM `blocks` WHERE `id` = '"+query+"'", beforeLen, page, pageSize)
- return
- }
- query = stringQuery(query)
- return fullTextSearchByFTS(query, boxFilter, pathFilter, typeFilter, orderBy, beforeLen, page, pageSize)
- }
- func fullTextSearchByRegexp(exp, boxFilter, pathFilter, typeFilter, orderBy string, beforeLen, page, pageSize int) (ret []*Block, matchedBlockCount, matchedRootCount int) {
- exp = filterQueryInvisibleChars(exp)
- fieldFilter := fieldRegexp(exp)
- stmt := "SELECT * FROM `blocks` WHERE " + fieldFilter + " AND type IN " + typeFilter
- stmt += boxFilter + pathFilter
- stmt += " " + orderBy
- stmt += " LIMIT " + strconv.Itoa(pageSize) + " OFFSET " + strconv.Itoa((page-1)*pageSize)
- blocks := sql.SelectBlocksRawStmtNoParse(stmt, Conf.Search.Limit)
- ret = fromSQLBlocks(&blocks, "", beforeLen)
- if 1 > len(ret) {
- ret = []*Block{}
- }
- matchedBlockCount, matchedRootCount = fullTextSearchCountByRegexp(exp, boxFilter, pathFilter, typeFilter)
- return
- }
- func fullTextSearchCountByRegexp(exp, boxFilter, pathFilter, typeFilter string) (matchedBlockCount, matchedRootCount int) {
- fieldFilter := fieldRegexp(exp)
- stmt := "SELECT COUNT(id) AS `matches`, COUNT(DISTINCT(root_id)) AS `docs` FROM `blocks` WHERE " + fieldFilter + " AND type IN " + typeFilter
- stmt += boxFilter + pathFilter
- result, _ := sql.QueryNoLimit(stmt)
- if 1 > len(result) {
- return
- }
- matchedBlockCount = int(result[0]["matches"].(int64))
- matchedRootCount = int(result[0]["docs"].(int64))
- return
- }
- func fullTextSearchByFTS(query, boxFilter, pathFilter, typeFilter, orderBy string, beforeLen, page, pageSize int) (ret []*Block, matchedBlockCount, matchedRootCount int) {
- table := "blocks_fts" // 大小写敏感
- if !Conf.Search.CaseSensitive {
- table = "blocks_fts_case_insensitive"
- }
- projections := "id, parent_id, root_id, hash, box, path, " +
- // Search result content snippet returns more text https://github.com/siyuan-note/siyuan/issues/10707
- "snippet(" + table + ", 6, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 512) AS hpath, " +
- "snippet(" + table + ", 7, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 512) AS name, " +
- "snippet(" + table + ", 8, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 512) AS alias, " +
- "snippet(" + table + ", 9, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 512) AS memo, " +
- "tag, " +
- "snippet(" + table + ", 11, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 512) AS content, " +
- "fcontent, markdown, length, type, subtype, ial, sort, created, updated"
- stmt := "SELECT " + projections + " FROM " + table + " WHERE (`" + table + "` MATCH '" + columnFilter() + ":(" + query + ")'"
- stmt += ") AND type IN " + typeFilter
- stmt += boxFilter + pathFilter
- if ignoreLines := getSearchIgnoreLines(); 0 < len(ignoreLines) {
- // Support ignore search results https://github.com/siyuan-note/siyuan/issues/10089
- notLike := bytes.Buffer{}
- for _, line := range ignoreLines {
- notLike.WriteString(" AND ")
- notLike.WriteString(line)
- }
- stmt += notLike.String()
- }
- stmt += " " + orderBy
- stmt += " LIMIT " + strconv.Itoa(pageSize) + " OFFSET " + strconv.Itoa((page-1)*pageSize)
- blocks := sql.SelectBlocksRawStmt(stmt, page, pageSize)
- ret = fromSQLBlocks(&blocks, "", beforeLen)
- if 1 > len(ret) {
- ret = []*Block{}
- }
- matchedBlockCount, matchedRootCount = fullTextSearchCount(query, boxFilter, pathFilter, typeFilter)
- return
- }
- func highlightByQuery(query, typeFilter, id string) (ret []string) {
- const limit = 256
- table := "blocks_fts"
- if !Conf.Search.CaseSensitive {
- table = "blocks_fts_case_insensitive"
- }
- projections := "id, parent_id, root_id, hash, box, path, " +
- "highlight(" + table + ", 6, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "') AS hpath, " +
- "highlight(" + table + ", 7, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "') AS name, " +
- "highlight(" + table + ", 8, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "') AS alias, " +
- "highlight(" + table + ", 9, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "') AS memo, " +
- "tag, " +
- "highlight(" + table + ", 11, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "') AS content, " +
- "fcontent, markdown, length, type, subtype, ial, sort, created, updated"
- stmt := "SELECT " + projections + " FROM " + table + " WHERE (`" + table + "` MATCH '" + columnFilter() + ":(" + query + ")'"
- stmt += ") AND type IN " + typeFilter
- stmt += " AND root_id = '" + id + "'"
- stmt += " LIMIT " + strconv.Itoa(limit)
- sqlBlocks := sql.SelectBlocksRawStmt(stmt, 1, limit)
- for _, block := range sqlBlocks {
- keyword := gulu.Str.SubstringsBetween(block.Content, search.SearchMarkLeft, search.SearchMarkRight)
- if 0 < len(keyword) {
- ret = append(ret, keyword...)
- }
- }
- ret = gulu.Str.RemoveDuplicatedElem(ret)
- return
- }
- func fullTextSearchCount(query, boxFilter, pathFilter, typeFilter string) (matchedBlockCount, matchedRootCount int) {
- query = filterQueryInvisibleChars(query)
- if ast.IsNodeIDPattern(query) {
- ret, _ := sql.QueryNoLimit("SELECT COUNT(id) AS `matches`, COUNT(DISTINCT(root_id)) AS `docs` FROM `blocks` WHERE `id` = '" + query + "'")
- if 1 > len(ret) {
- return
- }
- matchedBlockCount = int(ret[0]["matches"].(int64))
- matchedRootCount = int(ret[0]["docs"].(int64))
- return
- }
- table := "blocks_fts" // 大小写敏感
- if !Conf.Search.CaseSensitive {
- table = "blocks_fts_case_insensitive"
- }
- stmt := "SELECT COUNT(id) AS `matches`, COUNT(DISTINCT(root_id)) AS `docs` FROM `" + table + "` WHERE (`" + table + "` MATCH '" + columnFilter() + ":(" + query + ")'"
- stmt += ") AND type IN " + typeFilter
- stmt += boxFilter + pathFilter
- result, _ := sql.QueryNoLimit(stmt)
- if 1 > len(result) {
- return
- }
- matchedBlockCount = int(result[0]["matches"].(int64))
- matchedRootCount = int(result[0]["docs"].(int64))
- return
- }
- func markSearch(text string, keyword string, beforeLen int) (marked string, score float64) {
- if 0 == len(keyword) {
- marked = text
- if strings.Contains(marked, search.SearchMarkLeft) { // 使用 FTS snippet() 处理过高亮片段,这里简单替换后就返回
- marked = util.EscapeHTML(text)
- marked = strings.ReplaceAll(marked, search.SearchMarkLeft, "<mark>")
- marked = strings.ReplaceAll(marked, search.SearchMarkRight, "</mark>")
- return
- }
- keywords := gulu.Str.SubstringsBetween(marked, search.SearchMarkLeft, search.SearchMarkRight)
- keywords = gulu.Str.RemoveDuplicatedElem(keywords)
- keyword = strings.Join(keywords, search.TermSep)
- marked = strings.ReplaceAll(marked, search.SearchMarkLeft, "")
- marked = strings.ReplaceAll(marked, search.SearchMarkRight, "")
- _, marked = search.MarkText(marked, keyword, beforeLen, Conf.Search.CaseSensitive)
- return
- }
- pos, marked := search.MarkText(text, keyword, beforeLen, Conf.Search.CaseSensitive)
- if -1 < pos {
- if 0 == pos {
- score = 1
- }
- score += float64(strings.Count(marked, "<mark>"))
- winkler := smetrics.JaroWinkler(text, keyword, 0.7, 4)
- score += winkler
- }
- score = -score // 分越小排序越靠前
- return
- }
- func fromSQLBlocks(sqlBlocks *[]*sql.Block, terms string, beforeLen int) (ret []*Block) {
- for _, sqlBlock := range *sqlBlocks {
- ret = append(ret, fromSQLBlock(sqlBlock, terms, beforeLen))
- }
- return
- }
- func fromSQLBlock(sqlBlock *sql.Block, terms string, beforeLen int) (block *Block) {
- if nil == sqlBlock {
- return
- }
- id := sqlBlock.ID
- content := sqlBlock.Content
- if 1 < strings.Count(content, search.SearchMarkRight) && strings.HasSuffix(content, search.SearchMarkRight+"...") {
- // 返回多个关键字命中时需要检查最后一个关键字是否被截断
- firstKeyword := gulu.Str.SubStringBetween(content, search.SearchMarkLeft, search.SearchMarkRight)
- lastKeyword := gulu.Str.LastSubStringBetween(content, search.SearchMarkLeft, search.SearchMarkRight)
- if firstKeyword != lastKeyword {
- // 如果第一个关键字和最后一个关键字不相同,说明最后一个关键字被截断了
- // 此时需要将 content 中的最后一个关键字替换为完整的关键字
- content = strings.TrimSuffix(content, search.SearchMarkLeft+lastKeyword+search.SearchMarkRight+"...")
- content += search.SearchMarkLeft + firstKeyword + search.SearchMarkRight + "..."
- }
- }
- content = util.EscapeHTML(content) // Search dialog XSS https://github.com/siyuan-note/siyuan/issues/8525
- content, _ = markSearch(content, terms, beforeLen)
- content = maxContent(content, 5120)
- markdown := maxContent(sqlBlock.Markdown, 5120)
- fContent := util.EscapeHTML(sqlBlock.FContent) // fContent 会用于和 content 对比,在反链计算时用于判断是否是列表项下第一个子块,所以也需要转义 https://github.com/siyuan-note/siyuan/issues/11001
- block = &Block{
- Box: sqlBlock.Box,
- Path: sqlBlock.Path,
- ID: id,
- RootID: sqlBlock.RootID,
- ParentID: sqlBlock.ParentID,
- Alias: sqlBlock.Alias,
- Name: sqlBlock.Name,
- Memo: sqlBlock.Memo,
- Tag: sqlBlock.Tag,
- Content: content,
- FContent: fContent,
- Markdown: markdown,
- Type: treenode.FromAbbrType(sqlBlock.Type),
- SubType: sqlBlock.SubType,
- Sort: sqlBlock.Sort,
- }
- if "" != sqlBlock.IAL {
- block.IAL = map[string]string{}
- ialStr := strings.TrimPrefix(sqlBlock.IAL, "{:")
- ialStr = strings.TrimSuffix(ialStr, "}")
- ial := parse.Tokens2IAL([]byte(ialStr))
- for _, kv := range ial {
- block.IAL[kv[0]] = kv[1]
- }
- }
- hPath, _ := markSearch(sqlBlock.HPath, terms, 18)
- if !strings.HasPrefix(hPath, "/") {
- hPath = "/" + hPath
- }
- block.HPath = hPath
- if "" != block.Name {
- block.Name, _ = markSearch(block.Name, terms, 256)
- }
- if "" != block.Alias {
- block.Alias, _ = markSearch(block.Alias, terms, 256)
- }
- if "" != block.Memo {
- block.Memo, _ = markSearch(block.Memo, terms, 256)
- }
- return
- }
- func maxContent(content string, maxLen int) string {
- idx := strings.Index(content, "<mark>")
- if 128 < maxLen && maxLen <= idx {
- head := bytes.Buffer{}
- for i := 0; i < 512; i++ {
- r, size := utf8.DecodeLastRuneInString(content[:idx])
- head.WriteRune(r)
- idx -= size
- if 64 < head.Len() {
- break
- }
- }
- content = util.Reverse(head.String()) + content[idx:]
- }
- if maxLen < utf8.RuneCountInString(content) {
- return gulu.Str.SubStr(content, maxLen) + "..."
- }
- return content
- }
- func fieldRegexp(regexp string) string {
- buf := bytes.Buffer{}
- buf.WriteString("(")
- buf.WriteString("content REGEXP '")
- buf.WriteString(regexp)
- buf.WriteString("'")
- if Conf.Search.Name {
- buf.WriteString(" OR name REGEXP '")
- buf.WriteString(regexp)
- buf.WriteString("'")
- }
- if Conf.Search.Alias {
- buf.WriteString(" OR alias REGEXP '")
- buf.WriteString(regexp)
- buf.WriteString("'")
- }
- if Conf.Search.Memo {
- buf.WriteString(" OR memo REGEXP '")
- buf.WriteString(regexp)
- buf.WriteString("'")
- }
- if Conf.Search.IAL {
- buf.WriteString(" OR ial REGEXP '")
- buf.WriteString(regexp)
- buf.WriteString("'")
- }
- buf.WriteString(" OR tag REGEXP '")
- buf.WriteString(regexp)
- buf.WriteString("')")
- return buf.String()
- }
- func columnFilter() string {
- buf := bytes.Buffer{}
- buf.WriteString("{content")
- if Conf.Search.Name {
- buf.WriteString(" name")
- }
- if Conf.Search.Alias {
- buf.WriteString(" alias")
- }
- if Conf.Search.Memo {
- buf.WriteString(" memo")
- }
- if Conf.Search.IAL {
- buf.WriteString(" ial")
- }
- buf.WriteString(" tag}")
- return buf.String()
- }
- func stringQuery(query string) string {
- if "" == strings.TrimSpace(query) {
- return "\"" + query + "\""
- }
- query = strings.ReplaceAll(query, "\"", "\"\"")
- query = strings.ReplaceAll(query, "'", "''")
- buf := bytes.Buffer{}
- parts := strings.Split(query, " ")
- for _, part := range parts {
- part = strings.TrimSpace(part)
- part = "\"" + part + "\""
- buf.WriteString(part)
- buf.WriteString(" ")
- }
- return strings.TrimSpace(buf.String())
- }
- // markReplaceSpan 用于处理搜索高亮。
- func markReplaceSpan(n *ast.Node, unlinks *[]*ast.Node, keywords []string, markSpanDataType string, luteEngine *lute.Lute) bool {
- if ast.NodeText == n.Type {
- text := n.Content()
- escapedText := util.EscapeHTML(text)
- escapedKeywords := make([]string, len(keywords))
- for i, keyword := range keywords {
- escapedKeywords[i] = util.EscapeHTML(keyword)
- }
- hText := search.EncloseHighlighting(escapedText, escapedKeywords, search.GetMarkSpanStart(markSpanDataType), search.GetMarkSpanEnd(), Conf.Search.CaseSensitive, false)
- if hText != escapedText {
- text = hText
- }
- n.Tokens = gulu.Str.ToBytes(text)
- if bytes.Contains(n.Tokens, []byte(search.MarkDataType)) {
- linkTree := parse.Inline("", n.Tokens, luteEngine.ParseOptions)
- var children []*ast.Node
- for c := linkTree.Root.FirstChild.FirstChild; nil != c; c = c.Next {
- children = append(children, c)
- }
- for _, c := range children {
- n.InsertBefore(c)
- }
- *unlinks = append(*unlinks, n)
- return true
- }
- } else if ast.NodeTextMark == n.Type {
- // 搜索结果高亮支持大部分行级元素 https://github.com/siyuan-note/siyuan/issues/6745
- if n.IsTextMarkType("inline-math") || n.IsTextMarkType("inline-memo") {
- return false
- }
- var text string
- if n.IsTextMarkType("code") {
- // code 在前面的 n.
- for i, k := range keywords {
- keywords[i] = html.EscapeString(k)
- }
- text = n.TextMarkTextContent
- } else {
- text = n.Content()
- }
- startTag := search.GetMarkSpanStart(markSpanDataType)
- text = search.EncloseHighlighting(text, keywords, startTag, search.GetMarkSpanEnd(), Conf.Search.CaseSensitive, false)
- if strings.Contains(text, search.MarkDataType) {
- dataType := search.GetMarkSpanStart(n.TextMarkType + " " + search.MarkDataType)
- text = strings.ReplaceAll(text, startTag, dataType)
- tokens := gulu.Str.ToBytes(text)
- linkTree := parse.Inline("", tokens, luteEngine.ParseOptions)
- var children []*ast.Node
- for c := linkTree.Root.FirstChild.FirstChild; nil != c; c = c.Next {
- if ast.NodeText == c.Type {
- c.Type = ast.NodeTextMark
- c.TextMarkType = n.TextMarkType
- c.TextMarkTextContent = string(c.Tokens)
- if n.IsTextMarkType("a") {
- c.TextMarkAHref, c.TextMarkATitle = n.TextMarkAHref, n.TextMarkATitle
- } else if treenode.IsBlockRef(n) {
- c.TextMarkBlockRefID = n.TextMarkBlockRefID
- c.TextMarkBlockRefSubtype = n.TextMarkBlockRefSubtype
- } else if treenode.IsFileAnnotationRef(n) {
- c.TextMarkFileAnnotationRefID = n.TextMarkFileAnnotationRefID
- }
- } else if ast.NodeTextMark == c.Type {
- if n.IsTextMarkType("a") {
- c.TextMarkAHref, c.TextMarkATitle = n.TextMarkAHref, n.TextMarkATitle
- } else if treenode.IsBlockRef(n) {
- c.TextMarkBlockRefID = n.TextMarkBlockRefID
- c.TextMarkBlockRefSubtype = n.TextMarkBlockRefSubtype
- } else if treenode.IsFileAnnotationRef(n) {
- c.TextMarkFileAnnotationRefID = n.TextMarkFileAnnotationRefID
- }
- }
- children = append(children, c)
- if nil != n.Next && ast.NodeKramdownSpanIAL == n.Next.Type {
- c.KramdownIAL = n.KramdownIAL
- ial := &ast.Node{Type: ast.NodeKramdownSpanIAL, Tokens: n.Next.Tokens}
- children = append(children, ial)
- }
- }
- for _, c := range children {
- n.InsertBefore(c)
- }
- *unlinks = append(*unlinks, n)
- return true
- }
- }
- return false
- }
- // markReplaceSpanWithSplit 用于处理虚拟引用和反链提及高亮。
- func markReplaceSpanWithSplit(text string, keywords []string, replacementStart, replacementEnd string) (ret string) {
- // 虚拟引用和反链提及关键字按最长匹配优先 https://github.com/siyuan-note/siyuan/issues/7465
- sort.Slice(keywords, func(i, j int) bool { return len(keywords[i]) > len(keywords[j]) })
- tmp := search.EncloseHighlighting(text, keywords, replacementStart, replacementEnd, Conf.Search.CaseSensitive, true)
- parts := strings.Split(tmp, replacementEnd)
- buf := bytes.Buffer{}
- for i := 0; i < len(parts); i++ {
- if i >= len(parts)-1 {
- buf.WriteString(parts[i])
- break
- }
- if nextPart := parts[i+1]; 0 < len(nextPart) && lex.IsASCIILetter(nextPart[0]) {
- // 取消已经高亮的部分
- part := strings.ReplaceAll(parts[i], replacementStart, "")
- buf.WriteString(part)
- continue
- }
- buf.WriteString(parts[i])
- buf.WriteString(replacementEnd)
- }
- ret = buf.String()
- return
- }
- var (
- searchIgnoreLastModified int64
- searchIgnore []string
- searchIgnoreLock = sync.Mutex{}
- )
- func getSearchIgnoreLines() (ret []string) {
- // Support ignore search results https://github.com/siyuan-note/siyuan/issues/10089
- now := time.Now().UnixMilli()
- if now-searchIgnoreLastModified < 30*1000 {
- return searchIgnore
- }
- searchIgnoreLock.Lock()
- defer searchIgnoreLock.Unlock()
- searchIgnoreLastModified = now
- searchIgnorePath := filepath.Join(util.DataDir, ".siyuan", "searchignore")
- err := os.MkdirAll(filepath.Dir(searchIgnorePath), 0755)
- if nil != err {
- return
- }
- if !gulu.File.IsExist(searchIgnorePath) {
- if err = gulu.File.WriteFileSafer(searchIgnorePath, nil, 0644); nil != err {
- logging.LogErrorf("create searchignore [%s] failed: %s", searchIgnorePath, err)
- return
- }
- }
- data, err := os.ReadFile(searchIgnorePath)
- if nil != err {
- logging.LogErrorf("read searchignore [%s] failed: %s", searchIgnorePath, err)
- return
- }
- dataStr := string(data)
- dataStr = strings.ReplaceAll(dataStr, "\r\n", "\n")
- ret = strings.Split(dataStr, "\n")
- ret = gulu.Str.RemoveDuplicatedElem(ret)
- if 0 < len(ret) && "" == ret[0] {
- ret = ret[1:]
- }
- searchIgnore = nil
- for _, line := range ret {
- searchIgnore = append(searchIgnore, line)
- }
- return
- }
- var (
- refSearchIgnoreLastModified int64
- refSearchIgnore []string
- refSearchIgnoreLock = sync.Mutex{}
- )
- func getRefSearchIgnoreLines() (ret []string) {
- // Support ignore search results https://github.com/siyuan-note/siyuan/issues/10089
- now := time.Now().UnixMilli()
- if now-refSearchIgnoreLastModified < 30*1000 {
- return refSearchIgnore
- }
- refSearchIgnoreLock.Lock()
- defer refSearchIgnoreLock.Unlock()
- refSearchIgnoreLastModified = now
- searchIgnorePath := filepath.Join(util.DataDir, ".siyuan", "refsearchignore")
- err := os.MkdirAll(filepath.Dir(searchIgnorePath), 0755)
- if nil != err {
- return
- }
- if !gulu.File.IsExist(searchIgnorePath) {
- if err = gulu.File.WriteFileSafer(searchIgnorePath, nil, 0644); nil != err {
- logging.LogErrorf("create refsearchignore [%s] failed: %s", searchIgnorePath, err)
- return
- }
- }
- data, err := os.ReadFile(searchIgnorePath)
- if nil != err {
- logging.LogErrorf("read refsearchignore [%s] failed: %s", searchIgnorePath, err)
- return
- }
- dataStr := string(data)
- dataStr = strings.ReplaceAll(dataStr, "\r\n", "\n")
- ret = strings.Split(dataStr, "\n")
- ret = gulu.Str.RemoveDuplicatedElem(ret)
- if 0 < len(ret) && "" == ret[0] {
- ret = ret[1:]
- }
- refSearchIgnore = nil
- for _, line := range ret {
- refSearchIgnore = append(refSearchIgnore, line)
- }
- return
- }
- func filterQueryInvisibleChars(query string) string {
- query = strings.ReplaceAll(query, " ", "_@full_width_space@_")
- query = gulu.Str.RemoveInvisible(query)
- query = strings.ReplaceAll(query, "_@full_width_space@_", " ")
- return query
- }
|