siyuan/kernel/model/search.go

1875 lines
60 KiB
Go
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// SiYuan - Refactor your thinking
// Copyright (c) 2020-present, b3log.org
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package model
import (
"bytes"
"errors"
"fmt"
"math"
"os"
"path"
"path/filepath"
"regexp"
"sort"
"strconv"
"strings"
"sync"
"time"
"unicode/utf8"
"github.com/88250/gulu"
"github.com/88250/lute"
"github.com/88250/lute/ast"
"github.com/88250/lute/html"
"github.com/88250/lute/lex"
"github.com/88250/lute/parse"
"github.com/88250/vitess-sqlparser/sqlparser"
"github.com/jinzhu/copier"
"github.com/siyuan-note/filelock"
"github.com/siyuan-note/logging"
"github.com/siyuan-note/siyuan/kernel/conf"
"github.com/siyuan-note/siyuan/kernel/search"
"github.com/siyuan-note/siyuan/kernel/sql"
"github.com/siyuan-note/siyuan/kernel/task"
"github.com/siyuan-note/siyuan/kernel/treenode"
"github.com/siyuan-note/siyuan/kernel/util"
"github.com/xrash/smetrics"
)
func ListInvalidBlockRefs(page, pageSize int) (ret []*Block, matchedBlockCount, matchedRootCount, pageCount int) {
refBlockMap := map[string][]string{}
blockMap := map[string]bool{}
var invalidBlockIDs []string
notebooks, err := ListNotebooks()
if err != nil {
return
}
luteEngine := util.NewLute()
for _, notebook := range notebooks {
pages := pagedPaths(filepath.Join(util.DataDir, notebook.ID), 32)
for _, paths := range pages {
var trees []*parse.Tree
for _, localPath := range paths {
tree, loadTreeErr := loadTree(localPath, luteEngine)
if nil != loadTreeErr {
continue
}
trees = append(trees, tree)
}
for _, tree := range trees {
ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
if entering {
if n.IsBlock() {
blockMap[n.ID] = true
return ast.WalkContinue
}
if ast.NodeTextMark == n.Type {
if n.IsTextMarkType("a") {
if strings.HasPrefix(n.TextMarkAHref, "siyuan://blocks/") {
defID := strings.TrimPrefix(n.TextMarkAHref, "siyuan://blocks/")
if strings.Contains(defID, "?") {
defID = strings.Split(defID, "?")[0]
}
refID := treenode.ParentBlock(n).ID
if defIDs := refBlockMap[refID]; 1 > len(defIDs) {
refBlockMap[refID] = []string{defID}
} else {
refBlockMap[refID] = append(defIDs, defID)
}
}
} else if n.IsTextMarkType("block-ref") {
defID := n.TextMarkBlockRefID
refID := treenode.ParentBlock(n).ID
if defIDs := refBlockMap[refID]; 1 > len(defIDs) {
refBlockMap[refID] = []string{defID}
} else {
refBlockMap[refID] = append(defIDs, defID)
}
}
}
}
return ast.WalkContinue
})
}
}
}
invalidDefIDs := map[string]bool{}
for _, refDefIDs := range refBlockMap {
for _, defID := range refDefIDs {
invalidDefIDs[defID] = true
}
}
var toRemoves []string
for defID, _ := range invalidDefIDs {
if _, ok := blockMap[defID]; ok {
toRemoves = append(toRemoves, defID)
}
}
for _, toRemove := range toRemoves {
delete(invalidDefIDs, toRemove)
}
toRemoves = nil
for refID, defIDs := range refBlockMap {
var tmp []string
for _, defID := range defIDs {
if _, ok := invalidDefIDs[defID]; !ok {
tmp = append(tmp, defID)
}
}
for _, toRemove := range tmp {
defIDs = gulu.Str.RemoveElem(defIDs, toRemove)
}
if 1 > len(defIDs) {
toRemoves = append(toRemoves, refID)
}
}
for _, toRemove := range toRemoves {
delete(refBlockMap, toRemove)
}
for refID, _ := range refBlockMap {
invalidBlockIDs = append(invalidBlockIDs, refID)
}
invalidBlockIDs = gulu.Str.RemoveDuplicatedElem(invalidBlockIDs)
sort.Strings(invalidBlockIDs)
allInvalidBlockIDs := invalidBlockIDs
start := (page - 1) * pageSize
end := page * pageSize
if end > len(invalidBlockIDs) {
end = len(invalidBlockIDs)
}
invalidBlockIDs = invalidBlockIDs[start:end]
sqlBlocks := sql.GetBlocks(invalidBlockIDs)
var tmp []*sql.Block
for _, sqlBlock := range sqlBlocks {
if nil != sqlBlock {
tmp = append(tmp, sqlBlock)
}
}
sqlBlocks = tmp
ret = fromSQLBlocks(&sqlBlocks, "", 36)
if 1 > len(ret) {
ret = []*Block{}
}
matchedBlockCount = len(allInvalidBlockIDs)
rootCount := map[string]bool{}
for _, id := range allInvalidBlockIDs {
bt := treenode.GetBlockTree(id)
if nil == bt {
continue
}
rootCount[bt.RootID] = true
}
matchedRootCount = len(rootCount)
pageCount = (matchedBlockCount + pageSize - 1) / pageSize
return
}
type EmbedBlock struct {
Block *Block `json:"block"`
BlockPaths []*BlockPath `json:"blockPaths"`
}
func UpdateEmbedBlock(id, content string) (err error) {
bt := treenode.GetBlockTree(id)
if nil == bt {
err = ErrBlockNotFound
return
}
if treenode.TypeAbbr(ast.NodeBlockQueryEmbed.String()) != bt.Type {
err = errors.New("not query embed block")
return
}
embedBlock := &EmbedBlock{
Block: &Block{
Markdown: content,
},
}
updateEmbedBlockContent(id, []*EmbedBlock{embedBlock})
return
}
func GetEmbedBlock(embedBlockID string, includeIDs []string, headingMode int, breadcrumb bool) (ret []*EmbedBlock) {
return getEmbedBlock(embedBlockID, includeIDs, headingMode, breadcrumb)
}
func getEmbedBlock(embedBlockID string, includeIDs []string, headingMode int, breadcrumb bool) (ret []*EmbedBlock) {
stmt := "SELECT * FROM `blocks` WHERE `id` IN ('" + strings.Join(includeIDs, "','") + "')"
sqlBlocks := sql.SelectBlocksRawStmtNoParse(stmt, 1024)
// 根据 includeIDs 的顺序排序 Improve `//!js` query embed block result sorting https://github.com/siyuan-note/siyuan/issues/9977
m := map[string]int{}
for i, id := range includeIDs {
m[id] = i
}
sort.Slice(sqlBlocks, func(i, j int) bool {
return m[sqlBlocks[i].ID] < m[sqlBlocks[j].ID]
})
ret = buildEmbedBlock(embedBlockID, []string{}, headingMode, breadcrumb, sqlBlocks)
return
}
func SearchEmbedBlock(embedBlockID, stmt string, excludeIDs []string, headingMode int, breadcrumb bool) (ret []*EmbedBlock) {
return searchEmbedBlock(embedBlockID, stmt, excludeIDs, headingMode, breadcrumb)
}
func searchEmbedBlock(embedBlockID, stmt string, excludeIDs []string, headingMode int, breadcrumb bool) (ret []*EmbedBlock) {
sqlBlocks := sql.SelectBlocksRawStmtNoParse(stmt, Conf.Search.Limit)
ret = buildEmbedBlock(embedBlockID, excludeIDs, headingMode, breadcrumb, sqlBlocks)
return
}
func buildEmbedBlock(embedBlockID string, excludeIDs []string, headingMode int, breadcrumb bool, sqlBlocks []*sql.Block) (ret []*EmbedBlock) {
var tmp []*sql.Block
for _, b := range sqlBlocks {
if "query_embed" == b.Type { // 嵌入块不再嵌入
// 嵌入块支持搜索 https://github.com/siyuan-note/siyuan/issues/7112
// 这里会导致上面的 limit 限制不准确,导致结果变少,暂时没有解决方案,只能靠用户自己调整 SQL加上 type != 'query_embed' 的条件
continue
}
if !gulu.Str.Contains(b.ID, excludeIDs) {
tmp = append(tmp, b)
}
}
sqlBlocks = tmp
// 缓存最多 128 棵语法树
trees := map[string]*parse.Tree{}
count := 0
for _, sb := range sqlBlocks {
if nil == trees[sb.RootID] {
tree, _ := LoadTreeByBlockID(sb.RootID)
if nil == tree {
continue
}
trees[sb.RootID] = tree
count++
}
if 127 < count {
break
}
}
for _, sb := range sqlBlocks {
block, blockPaths := getEmbeddedBlock(trees, sb, headingMode, breadcrumb)
if nil == block {
continue
}
ret = append(ret, &EmbedBlock{
Block: block,
BlockPaths: blockPaths,
})
}
// 嵌入块支持搜索 https://github.com/siyuan-note/siyuan/issues/7112
task.AppendTaskWithTimeout(task.DatabaseIndexEmbedBlock, 30*time.Second, updateEmbedBlockContent, embedBlockID, ret)
// 添加笔记本名称
var boxIDs []string
for _, embedBlock := range ret {
boxIDs = append(boxIDs, embedBlock.Block.Box)
}
boxIDs = gulu.Str.RemoveDuplicatedElem(boxIDs)
boxNames := Conf.BoxNames(boxIDs)
for _, embedBlock := range ret {
name := boxNames[embedBlock.Block.Box]
embedBlock.Block.HPath = name + embedBlock.Block.HPath
}
if 1 > len(ret) {
ret = []*EmbedBlock{}
}
return
}
func SearchRefBlock(id, rootID, keyword string, beforeLen int, isSquareBrackets, isDatabase bool) (ret []*Block, newDoc bool) {
cachedTrees := map[string]*parse.Tree{}
onlyDoc := false
if isSquareBrackets {
onlyDoc = Conf.Editor.OnlySearchForDoc
}
if "" == keyword {
// 查询为空时默认的块引排序规则按最近使用优先 https://github.com/siyuan-note/siyuan/issues/3218
typeFilter := Conf.Search.TypeFilter()
ignoreLines := getRefSearchIgnoreLines()
refs := sql.QueryRefsRecent(onlyDoc, typeFilter, ignoreLines)
var btsID []string
for _, ref := range refs {
btsID = append(btsID, ref.DefBlockRootID)
}
btsID = gulu.Str.RemoveDuplicatedElem(btsID)
bts := treenode.GetBlockTrees(btsID)
for _, ref := range refs {
tree := cachedTrees[ref.DefBlockRootID]
if nil == tree {
tree, _ = loadTreeByBlockTree(bts[ref.DefBlockRootID])
}
if nil == tree {
continue
}
cachedTrees[ref.RootID] = tree
node := treenode.GetNodeInTree(tree, ref.DefBlockID)
if nil == node {
continue
}
sqlBlock := sql.BuildBlockFromNode(node, tree)
if nil == sqlBlock {
return
}
block := fromSQLBlock(sqlBlock, "", 0)
block.RefText = getNodeRefText(node)
block.RefText = maxContent(block.RefText, Conf.Editor.BlockRefDynamicAnchorTextMaxLen)
ret = append(ret, block)
}
if 1 > len(ret) {
ret = []*Block{}
}
// 在 hPath 中加入笔记本名 Show notebooks in hpath of block ref search list results https://github.com/siyuan-note/siyuan/issues/9378
prependNotebookNameInHPath(ret)
return
}
ret = fullTextSearchRefBlock(keyword, beforeLen, onlyDoc)
tmp := ret[:0]
var btsID []string
for _, b := range ret {
btsID = append(btsID, b.RootID)
}
btsID = gulu.Str.RemoveDuplicatedElem(btsID)
bts := treenode.GetBlockTrees(btsID)
for _, b := range ret {
tree := cachedTrees[b.RootID]
if nil == tree {
tree, _ = loadTreeByBlockTree(bts[b.RootID])
}
if nil == tree {
continue
}
cachedTrees[b.RootID] = tree
b.RefText = getBlockRefText(b.ID, tree)
hitFirstChildID := false
if b.IsContainerBlock() && "NodeDocument" != b.Type {
// `((` 引用候选中排除当前块的父块 https://github.com/siyuan-note/siyuan/issues/4538
tree := cachedTrees[b.RootID]
if nil == tree {
tree, _ = loadTreeByBlockTree(bts[b.RootID])
cachedTrees[b.RootID] = tree
}
if nil != tree {
bNode := treenode.GetNodeInTree(tree, b.ID)
if fc := treenode.FirstLeafBlock(bNode); nil != fc && fc.ID == id {
hitFirstChildID = true
}
}
}
if "NodeAttributeView" == b.Type {
// 数据库块可以添加到自身数据库块中,当前文档也可以添加到自身数据库块中
tmp = append(tmp, b)
} else {
// 排除自身块、父块和根块
if b.ID != id && !hitFirstChildID && b.ID != rootID {
tmp = append(tmp, b)
}
}
}
ret = tmp
if !isDatabase {
// 如果非数据库中搜索块引,则不允许新建重名文档
if block := treenode.GetBlockTree(id); nil != block {
p := path.Join(block.HPath, keyword)
newDoc = nil == treenode.GetBlockTreeRootByHPath(block.BoxID, p)
}
} else { // 如果是数据库中搜索绑定块,则允许新建重名文档 https://github.com/siyuan-note/siyuan/issues/11713
newDoc = true
}
// 在 hPath 中加入笔记本名 Show notebooks in hpath of block ref search list results https://github.com/siyuan-note/siyuan/issues/9378
prependNotebookNameInHPath(ret)
return
}
func prependNotebookNameInHPath(blocks []*Block) {
var boxIDs []string
for _, b := range blocks {
boxIDs = append(boxIDs, b.Box)
}
boxIDs = gulu.Str.RemoveDuplicatedElem(boxIDs)
boxNames := Conf.BoxNames(boxIDs)
for _, b := range blocks {
name := boxNames[b.Box]
b.HPath = util.EscapeHTML(name) + b.HPath
}
}
func FindReplace(keyword, replacement string, replaceTypes map[string]bool, ids []string, paths, boxes []string, types map[string]bool, method, orderBy, groupBy int) (err error) {
// method0文本1查询语法2SQL3正则表达式
if 1 == method || 2 == method {
err = errors.New(Conf.Language(132))
return
}
if 0 != groupBy {
// 按文档分组后不支持替换 Need to be reminded that replacement operations are not supported after grouping by doc https://github.com/siyuan-note/siyuan/issues/10161
// 因为分组条件传入以后搜索只能命中文档块,会导致 全部替换 失效
err = errors.New(Conf.Language(221))
return
}
// No longer trim spaces for the keyword and replacement https://github.com/siyuan-note/siyuan/issues/9229
if keyword == replacement {
return
}
r, _ := regexp.Compile(keyword)
escapedKey := util.EscapeHTML(keyword)
escapedR, _ := regexp.Compile(escapedKey)
ids = gulu.Str.RemoveDuplicatedElem(ids)
var renameRoots []*ast.Node
renameRootTitles := map[string]string{}
cachedTrees := map[string]*parse.Tree{}
historyDir, err := getHistoryDir(HistoryOpReplace, time.Now())
if err != nil {
logging.LogErrorf("get history dir failed: %s", err)
return
}
if 1 > len(ids) {
// `Replace All` is no longer affected by pagination https://github.com/siyuan-note/siyuan/issues/8265
blocks, _, _, _ := FullTextSearchBlock(keyword, boxes, paths, types, method, orderBy, groupBy, 1, math.MaxInt)
for _, block := range blocks {
ids = append(ids, block.ID)
}
}
for _, id := range ids {
bt := treenode.GetBlockTree(id)
if nil == bt {
continue
}
tree := cachedTrees[bt.RootID]
if nil != tree {
continue
}
tree, _ = LoadTreeByBlockID(id)
if nil == tree {
continue
}
historyPath := filepath.Join(historyDir, tree.Box, tree.Path)
if err = os.MkdirAll(filepath.Dir(historyPath), 0755); err != nil {
logging.LogErrorf("generate history failed: %s", err)
return
}
var data []byte
if data, err = filelock.ReadFile(filepath.Join(util.DataDir, tree.Box, tree.Path)); err != nil {
logging.LogErrorf("generate history failed: %s", err)
return
}
if err = gulu.File.WriteFileSafer(historyPath, data, 0644); err != nil {
logging.LogErrorf("generate history failed: %s", err)
return
}
cachedTrees[bt.RootID] = tree
}
indexHistoryDir(filepath.Base(historyDir), util.NewLute())
luteEngine := util.NewLute()
var reloadTreeIDs []string
for i, id := range ids {
bt := treenode.GetBlockTree(id)
if nil == bt {
continue
}
tree := cachedTrees[bt.RootID]
if nil == tree {
continue
}
node := treenode.GetNodeInTree(tree, id)
if nil == node {
continue
}
reloadTreeIDs = append(reloadTreeIDs, tree.ID)
if ast.NodeDocument == node.Type {
if !replaceTypes["docTitle"] {
continue
}
title := node.IALAttr("title")
if 0 == method {
if strings.Contains(title, keyword) {
docTitleReplacement := strings.ReplaceAll(replacement, "/", "")
renameRootTitles[node.ID] = strings.ReplaceAll(title, keyword, docTitleReplacement)
renameRoots = append(renameRoots, node)
}
} else if 3 == method {
if nil != r && r.MatchString(title) {
docTitleReplacement := strings.ReplaceAll(replacement, "/", "")
renameRootTitles[node.ID] = r.ReplaceAllString(title, docTitleReplacement)
renameRoots = append(renameRoots, node)
}
}
} else {
var unlinks []*ast.Node
ast.Walk(node, func(n *ast.Node, entering bool) ast.WalkStatus {
if !entering {
return ast.WalkContinue
}
switch n.Type {
case ast.NodeText:
if !replaceTypes["text"] {
return ast.WalkContinue
}
if replaceTextNode(n, method, keyword, replacement, r, luteEngine) {
unlinks = append(unlinks, n)
}
case ast.NodeLinkDest:
if !replaceTypes["imgSrc"] {
return ast.WalkContinue
}
replaceNodeTokens(n, method, keyword, replacement, r)
case ast.NodeLinkText:
if !replaceTypes["imgText"] {
return ast.WalkContinue
}
replaceNodeTokens(n, method, keyword, replacement, r)
case ast.NodeLinkTitle:
if !replaceTypes["imgTitle"] {
return ast.WalkContinue
}
replaceNodeTokens(n, method, keyword, replacement, r)
case ast.NodeCodeBlockCode:
if !replaceTypes["codeBlock"] {
return ast.WalkContinue
}
replaceNodeTokens(n, method, keyword, replacement, r)
case ast.NodeMathBlockContent:
if !replaceTypes["mathBlock"] {
return ast.WalkContinue
}
replaceNodeTokens(n, method, keyword, replacement, r)
case ast.NodeHTMLBlock:
if !replaceTypes["htmlBlock"] {
return ast.WalkContinue
}
replaceNodeTokens(n, method, keyword, replacement, r)
case ast.NodeTextMark:
if n.IsTextMarkType("code") {
if !replaceTypes["code"] {
return ast.WalkContinue
}
if 0 == method {
if strings.Contains(n.TextMarkTextContent, escapedKey) {
n.TextMarkTextContent = strings.ReplaceAll(n.TextMarkTextContent, escapedKey, replacement)
}
} else if 3 == method {
if nil != escapedR && escapedR.MatchString(n.TextMarkTextContent) {
n.TextMarkTextContent = escapedR.ReplaceAllString(n.TextMarkTextContent, replacement)
}
}
} else if n.IsTextMarkType("a") {
if replaceTypes["aText"] {
if 0 == method {
if strings.Contains(n.TextMarkTextContent, keyword) {
n.TextMarkTextContent = strings.ReplaceAll(n.TextMarkTextContent, keyword, replacement)
}
} else if 3 == method {
if nil != r && r.MatchString(n.TextMarkTextContent) {
n.TextMarkTextContent = r.ReplaceAllString(n.TextMarkTextContent, replacement)
}
}
}
if replaceTypes["aTitle"] {
if 0 == method {
if strings.Contains(n.TextMarkATitle, keyword) {
n.TextMarkATitle = strings.ReplaceAll(n.TextMarkATitle, keyword, replacement)
}
} else if 3 == method {
if nil != r && r.MatchString(n.TextMarkATitle) {
n.TextMarkATitle = r.ReplaceAllString(n.TextMarkATitle, replacement)
}
}
}
if replaceTypes["aHref"] {
if 0 == method {
if strings.Contains(n.TextMarkAHref, keyword) {
n.TextMarkAHref = strings.ReplaceAll(n.TextMarkAHref, keyword, replacement)
}
} else if 3 == method {
if nil != r && r.MatchString(n.TextMarkAHref) {
n.TextMarkAHref = r.ReplaceAllString(n.TextMarkAHref, replacement)
}
}
}
} else if n.IsTextMarkType("em") {
if !replaceTypes["em"] {
return ast.WalkContinue
}
replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "em")
} else if n.IsTextMarkType("strong") {
if !replaceTypes["strong"] {
return ast.WalkContinue
}
replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "strong")
} else if n.IsTextMarkType("kbd") {
if !replaceTypes["kbd"] {
return ast.WalkContinue
}
replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "kbd")
} else if n.IsTextMarkType("mark") {
if !replaceTypes["mark"] {
return ast.WalkContinue
}
replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "mark")
} else if n.IsTextMarkType("s") {
if !replaceTypes["s"] {
return ast.WalkContinue
}
replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "s")
} else if n.IsTextMarkType("sub") {
if !replaceTypes["sub"] {
return ast.WalkContinue
}
replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "sub")
} else if n.IsTextMarkType("sup") {
if !replaceTypes["sup"] {
return ast.WalkContinue
}
replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "sup")
} else if n.IsTextMarkType("tag") {
if !replaceTypes["tag"] {
return ast.WalkContinue
}
replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "tag")
} else if n.IsTextMarkType("u") {
if !replaceTypes["u"] {
return ast.WalkContinue
}
replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "u")
} else if n.IsTextMarkType("inline-math") {
if !replaceTypes["inlineMath"] {
return ast.WalkContinue
}
if 0 == method {
if strings.Contains(n.TextMarkInlineMathContent, keyword) {
n.TextMarkInlineMathContent = strings.ReplaceAll(n.TextMarkInlineMathContent, keyword, replacement)
}
} else if 3 == method {
if nil != r && r.MatchString(n.TextMarkInlineMathContent) {
n.TextMarkInlineMathContent = r.ReplaceAllString(n.TextMarkInlineMathContent, replacement)
}
}
} else if n.IsTextMarkType("inline-memo") {
if !replaceTypes["inlineMemo"] {
return ast.WalkContinue
}
if 0 == method {
if strings.Contains(n.TextMarkInlineMemoContent, keyword) {
n.TextMarkInlineMemoContent = strings.ReplaceAll(n.TextMarkInlineMemoContent, keyword, replacement)
}
} else if 3 == method {
if nil != r && r.MatchString(n.TextMarkInlineMemoContent) {
n.TextMarkInlineMemoContent = r.ReplaceAllString(n.TextMarkInlineMemoContent, replacement)
}
}
} else if n.IsTextMarkType("text") {
// Search and replace fails in some cases https://github.com/siyuan-note/siyuan/issues/10016
if !replaceTypes["text"] {
return ast.WalkContinue
}
replaceNodeTextMarkTextContent(n, method, keyword, replacement, r, "text")
} else if n.IsTextMarkType("block-ref") {
if !replaceTypes["blockRef"] {
return ast.WalkContinue
}
if 0 == method {
if strings.Contains(n.TextMarkTextContent, keyword) {
n.TextMarkTextContent = strings.ReplaceAll(n.TextMarkTextContent, keyword, replacement)
n.TextMarkBlockRefSubtype = "s"
}
} else if 3 == method {
if nil != r && r.MatchString(n.TextMarkTextContent) {
n.TextMarkTextContent = r.ReplaceAllString(n.TextMarkTextContent, replacement)
n.TextMarkBlockRefSubtype = "s"
}
}
}
}
return ast.WalkContinue
})
for _, unlink := range unlinks {
unlink.Unlink()
}
if err = writeTreeUpsertQueue(tree); err != nil {
return
}
}
util.PushEndlessProgress(fmt.Sprintf(Conf.Language(206), i+1, len(ids)))
}
for i, renameRoot := range renameRoots {
newTitle := renameRootTitles[renameRoot.ID]
RenameDoc(renameRoot.Box, renameRoot.Path, newTitle)
util.PushEndlessProgress(fmt.Sprintf(Conf.Language(207), i+1, len(renameRoots)))
}
sql.FlushQueue()
reloadTreeIDs = gulu.Str.RemoveDuplicatedElem(reloadTreeIDs)
for _, id := range reloadTreeIDs {
refreshProtyle(id)
}
sql.FlushQueue()
util.PushClearProgress()
return
}
func replaceNodeTextMarkTextContent(n *ast.Node, method int, keyword string, replacement string, r *regexp.Regexp, typ string) {
if 0 == method {
if "tag" == typ {
keyword = strings.TrimPrefix(keyword, "#")
keyword = strings.TrimSuffix(keyword, "#")
}
if strings.Contains(n.TextMarkTextContent, keyword) {
n.TextMarkTextContent = strings.ReplaceAll(n.TextMarkTextContent, keyword, replacement)
}
} else if 3 == method {
if nil != r && r.MatchString(n.TextMarkTextContent) {
n.TextMarkTextContent = r.ReplaceAllString(n.TextMarkTextContent, replacement)
}
}
}
// replaceTextNode 替换文本节点为其他节点。
// Supports replacing text elements with other elements https://github.com/siyuan-note/siyuan/issues/11058
func replaceTextNode(text *ast.Node, method int, keyword string, replacement string, r *regexp.Regexp, luteEngine *lute.Lute) bool {
if 0 == method {
newContent := text.Tokens
if Conf.Search.CaseSensitive {
if bytes.Contains(text.Tokens, []byte(keyword)) {
newContent = bytes.ReplaceAll(text.Tokens, []byte(keyword), []byte(replacement))
}
} else {
// 当搜索结果中的文本元素包含大小写混合时替换失败
// Replace fails when search results contain mixed case in text elements https://github.com/siyuan-note/siyuan/issues/9171
keywords := strings.Split(keyword, " ")
// keyword 可能是 "foo Foo" 使用空格分隔的大小写命中情况,这里统一转换小写后去重
if 1 < len(keywords) {
var lowerKeywords []string
for _, k := range keywords {
lowerKeywords = append(lowerKeywords, strings.ToLower(k))
}
lowerKeywords = gulu.Str.RemoveDuplicatedElem(lowerKeywords)
keyword = strings.Join(lowerKeywords, " ")
}
if bytes.Contains(bytes.ToLower(text.Tokens), []byte(keyword)) {
newContent = replaceCaseInsensitive(text.Tokens, []byte(keyword), []byte(replacement))
}
}
if !bytes.Equal(newContent, text.Tokens) {
tree := parse.Inline("", newContent, luteEngine.ParseOptions)
if nil == tree.Root.FirstChild {
return false
}
parse.NestedInlines2FlattedSpans(tree, false)
var replaceNodes []*ast.Node
for rNode := tree.Root.FirstChild.FirstChild; nil != rNode; rNode = rNode.Next {
replaceNodes = append(replaceNodes, rNode)
}
for _, rNode := range replaceNodes {
text.InsertBefore(rNode)
}
return true
}
} else if 3 == method {
if nil != r && r.MatchString(string(text.Tokens)) {
newContent := []byte(r.ReplaceAllString(string(text.Tokens), replacement))
tree := parse.Inline("", newContent, luteEngine.ParseOptions)
if nil == tree.Root.FirstChild {
return false
}
var replaceNodes []*ast.Node
for rNode := tree.Root.FirstChild.FirstChild; nil != rNode; rNode = rNode.Next {
replaceNodes = append(replaceNodes, rNode)
}
for _, rNode := range replaceNodes {
text.InsertBefore(rNode)
}
return true
}
}
return false
}
func replaceNodeTokens(n *ast.Node, method int, keyword string, replacement string, r *regexp.Regexp) {
if 0 == method {
if bytes.Contains(n.Tokens, []byte(keyword)) {
n.Tokens = bytes.ReplaceAll(n.Tokens, []byte(keyword), []byte(replacement))
}
} else if 3 == method {
if nil != r && r.MatchString(string(n.Tokens)) {
n.Tokens = []byte(r.ReplaceAllString(string(n.Tokens), replacement))
}
}
}
// FullTextSearchBlock 搜索内容块。
//
// method0关键字1查询语法2SQL3正则表达式
// orderBy: 0按块类型默认1按创建时间升序2按创建时间降序3按更新时间升序4按更新时间降序5按内容顺序仅在按文档分组时6按相关度升序7按相关度降序
// groupBy0不分组1按文档分组
func FullTextSearchBlock(query string, boxes, paths []string, types map[string]bool, method, orderBy, groupBy, page, pageSize int) (ret []*Block, matchedBlockCount, matchedRootCount, pageCount int) {
ret = []*Block{}
if "" == query {
return
}
trimQuery := strings.TrimSpace(query)
if "" != trimQuery {
query = trimQuery
}
beforeLen := 36
var blocks []*Block
orderByClause := buildOrderBy(query, method, orderBy)
switch method {
case 1: // 查询语法
filter := buildTypeFilter(types)
boxFilter := buildBoxesFilter(boxes)
pathFilter := buildPathsFilter(paths)
blocks, matchedBlockCount, matchedRootCount = fullTextSearchByQuerySyntax(query, boxFilter, pathFilter, filter, orderByClause, beforeLen, page, pageSize)
case 2: // SQL
blocks, matchedBlockCount, matchedRootCount = searchBySQL(query, beforeLen, page, pageSize)
case 3: // 正则表达式
typeFilter := buildTypeFilter(types)
boxFilter := buildBoxesFilter(boxes)
pathFilter := buildPathsFilter(paths)
blocks, matchedBlockCount, matchedRootCount = fullTextSearchByRegexp(query, boxFilter, pathFilter, typeFilter, orderByClause, beforeLen, page, pageSize)
default: // 关键字
filter := buildTypeFilter(types)
boxFilter := buildBoxesFilter(boxes)
pathFilter := buildPathsFilter(paths)
blocks, matchedBlockCount, matchedRootCount = fullTextSearchByKeyword(query, boxFilter, pathFilter, filter, orderByClause, beforeLen, page, pageSize)
}
pageCount = (matchedBlockCount + pageSize - 1) / pageSize
switch groupBy {
case 0: // 不分组
ret = blocks
case 1: // 按文档分组
rootMap := map[string]bool{}
var rootIDs []string
contentSorts := map[string]int{}
var btsID []string
for _, b := range blocks {
btsID = append(btsID, b.RootID)
}
btsID = gulu.Str.RemoveDuplicatedElem(btsID)
bts := treenode.GetBlockTrees(btsID)
for _, b := range blocks {
if _, ok := rootMap[b.RootID]; !ok {
rootMap[b.RootID] = true
rootIDs = append(rootIDs, b.RootID)
tree, _ := loadTreeByBlockTree(bts[b.RootID])
if nil == tree {
continue
}
if 5 == orderBy { // 按内容顺序(仅在按文档分组时)
sort := 0
ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
if !entering || !n.IsBlock() {
return ast.WalkContinue
}
contentSorts[n.ID] = sort
sort++
return ast.WalkContinue
})
}
}
}
sqlRoots := sql.GetBlocks(rootIDs)
roots := fromSQLBlocks(&sqlRoots, "", beforeLen)
for _, root := range roots {
for _, b := range blocks {
if 5 == orderBy { // 按内容顺序(仅在按文档分组时)
b.Sort = contentSorts[b.ID]
}
if b.RootID == root.ID {
root.Children = append(root.Children, b)
}
}
switch orderBy {
case 1: //按创建时间升序
sort.Slice(root.Children, func(i, j int) bool { return root.Children[i].Created < root.Children[j].Created })
case 2: // 按创建时间降序
sort.Slice(root.Children, func(i, j int) bool { return root.Children[i].Created > root.Children[j].Created })
case 3: // 按更新时间升序
sort.Slice(root.Children, func(i, j int) bool { return root.Children[i].Updated < root.Children[j].Updated })
case 4: // 按更新时间降序
sort.Slice(root.Children, func(i, j int) bool { return root.Children[i].Updated > root.Children[j].Updated })
case 5: // 按内容顺序(仅在按文档分组时)
sort.Slice(root.Children, func(i, j int) bool { return root.Children[i].Sort < root.Children[j].Sort })
default: // 按块类型(默认)
sort.Slice(root.Children, func(i, j int) bool { return root.Children[i].Sort < root.Children[j].Sort })
}
}
switch orderBy {
case 1: //按创建时间升序
sort.Slice(roots, func(i, j int) bool { return roots[i].Created < roots[j].Created })
case 2: // 按创建时间降序
sort.Slice(roots, func(i, j int) bool { return roots[i].Created > roots[j].Created })
case 3: // 按更新时间升序
sort.Slice(roots, func(i, j int) bool { return roots[i].Updated < roots[j].Updated })
case 4: // 按更新时间降序
sort.Slice(roots, func(i, j int) bool { return roots[i].Updated > roots[j].Updated })
case 5: // 按内容顺序(仅在按文档分组时)
// 都是文档,不需要再次排序
case 6, 7: // 按相关度
// 已在 ORDER BY 中处理
default: // 按块类型(默认)
// 都是文档,不需要再次排序
}
ret = roots
default:
ret = blocks
}
if 1 > len(ret) {
ret = []*Block{}
}
return
}
func buildBoxesFilter(boxes []string) string {
if 0 == len(boxes) {
return ""
}
builder := bytes.Buffer{}
builder.WriteString(" AND (")
for i, box := range boxes {
builder.WriteString(fmt.Sprintf("box = '%s'", box))
if i < len(boxes)-1 {
builder.WriteString(" OR ")
}
}
builder.WriteString(")")
return builder.String()
}
func buildPathsFilter(paths []string) string {
if 0 == len(paths) {
return ""
}
builder := bytes.Buffer{}
builder.WriteString(" AND (")
for i, path := range paths {
builder.WriteString(fmt.Sprintf("path LIKE '%s%%'", path))
if i < len(paths)-1 {
builder.WriteString(" OR ")
}
}
builder.WriteString(")")
return builder.String()
}
func buildOrderBy(query string, method, orderBy int) string {
switch orderBy {
case 1:
return "ORDER BY created ASC"
case 2:
return "ORDER BY created DESC"
case 3:
return "ORDER BY updated ASC"
case 4:
return "ORDER BY updated DESC"
case 6:
if 0 != method && 1 != method {
// 只有关键字搜索和查询语法搜索才支持按相关度升序 https://github.com/siyuan-note/siyuan/issues/7861
return "ORDER BY sort DESC, updated DESC"
}
return "ORDER BY rank DESC" // 默认是按相关度降序,所以按相关度升序要反过来使用 DESC
case 7:
if 0 != method && 1 != method {
return "ORDER BY sort ASC, updated DESC"
}
return "ORDER BY rank" // 默认是按相关度降序
default:
clause := "ORDER BY CASE " +
"WHEN name = '${keyword}' THEN 10 " +
"WHEN alias = '${keyword}' THEN 20 " +
"WHEN name LIKE '%${keyword}%' THEN 50 " +
"WHEN alias LIKE '%${keyword}%' THEN 60 " +
"ELSE 65535 END ASC, sort ASC, updated DESC"
clause = strings.ReplaceAll(clause, "${keyword}", strings.ReplaceAll(query, "'", "''"))
return clause
}
}
func buildTypeFilter(types map[string]bool) string {
s := conf.NewSearch()
if err := copier.Copy(s, Conf.Search); err != nil {
logging.LogErrorf("copy search conf failed: %s", err)
}
if nil != types {
s.Document = types["document"]
s.Heading = types["heading"]
s.List = types["list"]
s.ListItem = types["listItem"]
s.CodeBlock = types["codeBlock"]
s.MathBlock = types["mathBlock"]
s.Table = types["table"]
s.Blockquote = types["blockquote"]
s.SuperBlock = types["superBlock"]
s.Paragraph = types["paragraph"]
s.HTMLBlock = types["htmlBlock"]
s.EmbedBlock = types["embedBlock"]
s.DatabaseBlock = types["databaseBlock"]
s.AudioBlock = types["audioBlock"]
s.VideoBlock = types["videoBlock"]
s.IFrameBlock = types["iframeBlock"]
s.WidgetBlock = types["widgetBlock"]
} else {
s.Document = Conf.Search.Document
s.Heading = Conf.Search.Heading
s.List = Conf.Search.List
s.ListItem = Conf.Search.ListItem
s.CodeBlock = Conf.Search.CodeBlock
s.MathBlock = Conf.Search.MathBlock
s.Table = Conf.Search.Table
s.Blockquote = Conf.Search.Blockquote
s.SuperBlock = Conf.Search.SuperBlock
s.Paragraph = Conf.Search.Paragraph
s.HTMLBlock = Conf.Search.HTMLBlock
s.EmbedBlock = Conf.Search.EmbedBlock
s.DatabaseBlock = Conf.Search.DatabaseBlock
s.AudioBlock = Conf.Search.AudioBlock
s.VideoBlock = Conf.Search.VideoBlock
s.IFrameBlock = Conf.Search.IFrameBlock
s.WidgetBlock = Conf.Search.WidgetBlock
}
return s.TypeFilter()
}
func searchBySQL(stmt string, beforeLen, page, pageSize int) (ret []*Block, matchedBlockCount, matchedRootCount int) {
stmt = filterQueryInvisibleChars(stmt)
stmt = strings.TrimSpace(stmt)
blocks := sql.SelectBlocksRawStmt(stmt, page, pageSize)
ret = fromSQLBlocks(&blocks, "", beforeLen)
if 1 > len(ret) {
ret = []*Block{}
return
}
stmt = strings.ToLower(stmt)
if strings.HasPrefix(stmt, "select a.* ") { // 多个搜索关键字匹配文档 https://github.com/siyuan-note/siyuan/issues/7350
stmt = strings.ReplaceAll(stmt, "select a.* ", "select COUNT(a.id) AS `matches`, COUNT(DISTINCT(a.root_id)) AS `docs` ")
} else {
stmt = strings.ReplaceAll(stmt, "select * ", "select COUNT(id) AS `matches`, COUNT(DISTINCT(root_id)) AS `docs` ")
}
stmt = removeLimitClause(stmt)
result, _ := sql.QueryNoLimit(stmt)
if 1 > len(ret) {
return
}
matchedBlockCount = int(result[0]["matches"].(int64))
matchedRootCount = int(result[0]["docs"].(int64))
return
}
func removeLimitClause(stmt string) string {
parsedStmt, err := sqlparser.Parse(stmt)
if err != nil {
return stmt
}
switch parsedStmt.(type) {
case *sqlparser.Select:
slct := parsedStmt.(*sqlparser.Select)
if nil != slct.Limit {
slct.Limit = nil
}
stmt = sqlparser.String(slct)
}
return stmt
}
func fullTextSearchRefBlock(keyword string, beforeLen int, onlyDoc bool) (ret []*Block) {
keyword = filterQueryInvisibleChars(keyword)
if id := extractID(keyword); "" != id {
ret, _, _ = searchBySQL("SELECT * FROM `blocks` WHERE `id` = '"+id+"'", 36, 1, 32)
return
}
quotedKeyword := stringQuery(keyword)
table := "blocks_fts" // 大小写敏感
if !Conf.Search.CaseSensitive {
table = "blocks_fts_case_insensitive"
}
projections := "id, parent_id, root_id, hash, box, path, " +
"snippet(" + table + ", 6, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 64) AS hpath, " +
"snippet(" + table + ", 7, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 64) AS name, " +
"snippet(" + table + ", 8, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 64) AS alias, " +
"snippet(" + table + ", 9, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 64) AS memo, " +
"snippet(" + table + ", 10, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 64) AS tag, " +
"snippet(" + table + ", 11, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 64) AS content, " +
"fcontent, markdown, length, type, subtype, ial, sort, created, updated"
stmt := "SELECT " + projections + " FROM " + table + " WHERE " + table + " MATCH '" + columnFilter() + ":(" + quotedKeyword + ")' AND type"
if onlyDoc {
stmt += " = 'd'"
} else {
stmt += " IN " + Conf.Search.TypeFilter()
}
if ignoreLines := getRefSearchIgnoreLines(); 0 < len(ignoreLines) {
// Support ignore search results https://github.com/siyuan-note/siyuan/issues/10089
buf := bytes.Buffer{}
for _, line := range ignoreLines {
buf.WriteString(" AND ")
buf.WriteString(line)
}
stmt += buf.String()
}
orderBy := ` ORDER BY CASE
WHEN name = '${keyword}' THEN 10
WHEN alias = '${keyword}' THEN 20
WHEN memo = '${keyword}' THEN 30
WHEN content = '${keyword}' and type = 'd' THEN 40
WHEN content LIKE '%${keyword}%' and type = 'd' THEN 41
WHEN name LIKE '%${keyword}%' THEN 50
WHEN alias LIKE '%${keyword}%' THEN 60
WHEN content = '${keyword}' and type = 'h' THEN 70
WHEN content LIKE '%${keyword}%' and type = 'h' THEN 71
WHEN fcontent = '${keyword}' and type = 'i' THEN 80
WHEN fcontent LIKE '%${keyword}%' and type = 'i' THEN 81
WHEN memo LIKE '%${keyword}%' THEN 90
WHEN content LIKE '%${keyword}%' and type != 'i' and type != 'l' THEN 100
ELSE 65535 END ASC, sort ASC, length ASC`
orderBy = strings.ReplaceAll(orderBy, "${keyword}", strings.ReplaceAll(keyword, "'", "''"))
stmt += orderBy + " LIMIT " + strconv.Itoa(Conf.Search.Limit)
blocks := sql.SelectBlocksRawStmtNoParse(stmt, Conf.Search.Limit)
ret = fromSQLBlocks(&blocks, "", beforeLen)
if 1 > len(ret) {
ret = []*Block{}
}
return
}
func extractID(content string) (ret string) {
// Improve block ref search ID extraction https://github.com/siyuan-note/siyuan/issues/10848
if 22 > len(content) {
return
}
// 从第一个字符开始循环,直到找到一个合法的 ID 为止
for i := 0; i < len(content)-21; i++ {
if ast.IsNodeIDPattern(content[i : i+22]) {
ret = content[i : i+22]
return
}
}
return
}
func fullTextSearchByQuerySyntax(query, boxFilter, pathFilter, typeFilter, orderBy string, beforeLen, page, pageSize int) (ret []*Block, matchedBlockCount, matchedRootCount int) {
query = filterQueryInvisibleChars(query)
if ast.IsNodeIDPattern(query) {
ret, matchedBlockCount, matchedRootCount = searchBySQL("SELECT * FROM `blocks` WHERE `id` = '"+query+"'", beforeLen, page, pageSize)
return
}
return fullTextSearchByFTS(query, boxFilter, pathFilter, typeFilter, orderBy, beforeLen, page, pageSize)
}
func fullTextSearchByKeyword(query, boxFilter, pathFilter, typeFilter string, orderBy string, beforeLen, page, pageSize int) (ret []*Block, matchedBlockCount, matchedRootCount int) {
query = filterQueryInvisibleChars(query)
if ast.IsNodeIDPattern(query) {
ret, matchedBlockCount, matchedRootCount = searchBySQL("SELECT * FROM `blocks` WHERE `id` = '"+query+"'", beforeLen, page, pageSize)
return
}
query = stringQuery(query)
return fullTextSearchByFTS(query, boxFilter, pathFilter, typeFilter, orderBy, beforeLen, page, pageSize)
}
func fullTextSearchByRegexp(exp, boxFilter, pathFilter, typeFilter, orderBy string, beforeLen, page, pageSize int) (ret []*Block, matchedBlockCount, matchedRootCount int) {
exp = filterQueryInvisibleChars(exp)
fieldFilter := fieldRegexp(exp)
stmt := "SELECT * FROM `blocks` WHERE " + fieldFilter + " AND type IN " + typeFilter
stmt += boxFilter + pathFilter
stmt += " " + orderBy
regex := regexp.MustCompile(exp)
blocks := sql.SelectBlocksRegex(stmt, regex, Conf.Search.Name, Conf.Search.Alias, Conf.Search.Memo, Conf.Search.IAL, page, pageSize)
ret = fromSQLBlocks(&blocks, "", beforeLen)
if 1 > len(ret) {
ret = []*Block{}
}
matchedBlockCount, matchedRootCount = fullTextSearchCountByRegexp(exp, boxFilter, pathFilter, typeFilter)
return
}
func fullTextSearchCountByRegexp(exp, boxFilter, pathFilter, typeFilter string) (matchedBlockCount, matchedRootCount int) {
fieldFilter := fieldRegexp(exp)
stmt := "SELECT COUNT(id) AS `matches`, COUNT(DISTINCT(root_id)) AS `docs` FROM `blocks` WHERE " + fieldFilter + " AND type IN " + typeFilter
stmt += boxFilter + pathFilter
result, _ := sql.QueryNoLimit(stmt)
if 1 > len(result) {
return
}
matchedBlockCount = int(result[0]["matches"].(int64))
matchedRootCount = int(result[0]["docs"].(int64))
return
}
func fullTextSearchByFTS(query, boxFilter, pathFilter, typeFilter, orderBy string, beforeLen, page, pageSize int) (ret []*Block, matchedBlockCount, matchedRootCount int) {
table := "blocks_fts" // 大小写敏感
if !Conf.Search.CaseSensitive {
table = "blocks_fts_case_insensitive"
}
projections := "id, parent_id, root_id, hash, box, path, " +
// Search result content snippet returns more text https://github.com/siyuan-note/siyuan/issues/10707
"snippet(" + table + ", 6, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 512) AS hpath, " +
"snippet(" + table + ", 7, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 512) AS name, " +
"snippet(" + table + ", 8, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 512) AS alias, " +
"snippet(" + table + ", 9, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 512) AS memo, " +
"snippet(" + table + ", 10, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 64) AS tag, " +
"snippet(" + table + ", 11, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "', '...', 512) AS content, " +
"fcontent, markdown, length, type, subtype, ial, sort, created, updated"
stmt := "SELECT " + projections + " FROM " + table + " WHERE (`" + table + "` MATCH '" + columnFilter() + ":(" + query + ")'"
stmt += ") AND type IN " + typeFilter
stmt += boxFilter + pathFilter
if ignoreLines := getSearchIgnoreLines(); 0 < len(ignoreLines) {
// Support ignore search results https://github.com/siyuan-note/siyuan/issues/10089
buf := bytes.Buffer{}
for _, line := range ignoreLines {
buf.WriteString(" AND ")
buf.WriteString(line)
}
stmt += buf.String()
}
stmt += " " + orderBy
stmt += " LIMIT " + strconv.Itoa(pageSize) + " OFFSET " + strconv.Itoa((page-1)*pageSize)
blocks := sql.SelectBlocksRawStmt(stmt, page, pageSize)
ret = fromSQLBlocks(&blocks, "", beforeLen)
if 1 > len(ret) {
ret = []*Block{}
}
matchedBlockCount, matchedRootCount = fullTextSearchCount(query, boxFilter, pathFilter, typeFilter)
return
}
func highlightByFTS(query, typeFilter, id string) (ret []string) {
const limit = 256
table := "blocks_fts"
if !Conf.Search.CaseSensitive {
table = "blocks_fts_case_insensitive"
}
projections := "id, parent_id, root_id, hash, box, path, " +
"highlight(" + table + ", 6, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "') AS hpath, " +
"highlight(" + table + ", 7, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "') AS name, " +
"highlight(" + table + ", 8, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "') AS alias, " +
"highlight(" + table + ", 9, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "') AS memo, " +
"highlight(" + table + ", 10, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "') AS tag, " +
"highlight(" + table + ", 11, '" + search.SearchMarkLeft + "', '" + search.SearchMarkRight + "') AS content, " +
"fcontent, markdown, length, type, subtype, ial, sort, created, updated"
stmt := "SELECT " + projections + " FROM " + table + " WHERE (`" + table + "` MATCH '" + columnFilter() + ":(" + query + ")'"
stmt += ") AND type IN " + typeFilter
stmt += " AND root_id = '" + id + "'"
stmt += " LIMIT " + strconv.Itoa(limit)
sqlBlocks := sql.SelectBlocksRawStmt(stmt, 1, limit)
for _, block := range sqlBlocks {
keyword := gulu.Str.SubstringsBetween(block.Content, search.SearchMarkLeft, search.SearchMarkRight)
if 0 < len(keyword) {
ret = append(ret, keyword...)
}
}
ret = gulu.Str.RemoveDuplicatedElem(ret)
return
}
func highlightByRegexp(query, typeFilter, id string) (ret []string) {
fieldFilter := fieldRegexp(query)
stmt := "SELECT * FROM `blocks` WHERE " + fieldFilter + " AND type IN " + typeFilter
stmt += " AND root_id = '" + id + "'"
regex := regexp.MustCompile(query)
sqlBlocks := sql.SelectBlocksRegex(stmt, regex, Conf.Search.Name, Conf.Search.Alias, Conf.Search.Memo, Conf.Search.IAL, 1, 256)
for _, block := range sqlBlocks {
keyword := gulu.Str.SubstringsBetween(block.Content, search.SearchMarkLeft, search.SearchMarkRight)
if 0 < len(keyword) {
ret = append(ret, keyword...)
}
}
ret = gulu.Str.RemoveDuplicatedElem(ret)
return
}
func fullTextSearchCount(query, boxFilter, pathFilter, typeFilter string) (matchedBlockCount, matchedRootCount int) {
query = filterQueryInvisibleChars(query)
if ast.IsNodeIDPattern(query) {
ret, _ := sql.QueryNoLimit("SELECT COUNT(id) AS `matches`, COUNT(DISTINCT(root_id)) AS `docs` FROM `blocks` WHERE `id` = '" + query + "'")
if 1 > len(ret) {
return
}
matchedBlockCount = int(ret[0]["matches"].(int64))
matchedRootCount = int(ret[0]["docs"].(int64))
return
}
table := "blocks_fts" // 大小写敏感
if !Conf.Search.CaseSensitive {
table = "blocks_fts_case_insensitive"
}
stmt := "SELECT COUNT(id) AS `matches`, COUNT(DISTINCT(root_id)) AS `docs` FROM `" + table + "` WHERE (`" + table + "` MATCH '" + columnFilter() + ":(" + query + ")'"
stmt += ") AND type IN " + typeFilter
stmt += boxFilter + pathFilter
if ignoreLines := getSearchIgnoreLines(); 0 < len(ignoreLines) {
// Support ignore search results https://github.com/siyuan-note/siyuan/issues/10089
buf := bytes.Buffer{}
for _, line := range ignoreLines {
buf.WriteString(" AND ")
buf.WriteString(line)
}
stmt += buf.String()
}
result, _ := sql.QueryNoLimit(stmt)
if 1 > len(result) {
return
}
matchedBlockCount = int(result[0]["matches"].(int64))
matchedRootCount = int(result[0]["docs"].(int64))
return
}
func markSearch(text string, keyword string, beforeLen int) (marked string, score float64) {
if 0 == len(keyword) {
marked = text
if strings.Contains(marked, search.SearchMarkLeft) { // 使用 FTS snippet() 处理过高亮片段,这里简单替换后就返回
marked = util.EscapeHTML(text)
marked = strings.ReplaceAll(marked, search.SearchMarkLeft, "<mark>")
marked = strings.ReplaceAll(marked, search.SearchMarkRight, "</mark>")
return
}
keywords := gulu.Str.SubstringsBetween(marked, search.SearchMarkLeft, search.SearchMarkRight)
keywords = gulu.Str.RemoveDuplicatedElem(keywords)
keyword = strings.Join(keywords, search.TermSep)
marked = strings.ReplaceAll(marked, search.SearchMarkLeft, "")
marked = strings.ReplaceAll(marked, search.SearchMarkRight, "")
_, marked = search.MarkText(marked, keyword, beforeLen, Conf.Search.CaseSensitive)
return
}
pos, marked := search.MarkText(text, keyword, beforeLen, Conf.Search.CaseSensitive)
if -1 < pos {
if 0 == pos {
score = 1
}
score += float64(strings.Count(marked, "<mark>"))
winkler := smetrics.JaroWinkler(text, keyword, 0.7, 4)
score += winkler
}
score = -score // 分越小排序越靠前
return
}
func fromSQLBlocks(sqlBlocks *[]*sql.Block, terms string, beforeLen int) (ret []*Block) {
for _, sqlBlock := range *sqlBlocks {
ret = append(ret, fromSQLBlock(sqlBlock, terms, beforeLen))
}
return
}
func fromSQLBlock(sqlBlock *sql.Block, terms string, beforeLen int) (block *Block) {
if nil == sqlBlock {
return
}
id := sqlBlock.ID
content := sqlBlock.Content
if 1 < strings.Count(content, search.SearchMarkRight) && strings.HasSuffix(content, search.SearchMarkRight+"...") {
// 返回多个关键字命中时需要检查最后一个关键字是否被截断
firstKeyword := gulu.Str.SubStringBetween(content, search.SearchMarkLeft, search.SearchMarkRight)
lastKeyword := gulu.Str.LastSubStringBetween(content, search.SearchMarkLeft, search.SearchMarkRight)
if firstKeyword != lastKeyword {
// 如果第一个关键字和最后一个关键字不相同,说明最后一个关键字被截断了
// 此时需要将 content 中的最后一个关键字替换为完整的关键字
content = strings.TrimSuffix(content, search.SearchMarkLeft+lastKeyword+search.SearchMarkRight+"...")
content += search.SearchMarkLeft + firstKeyword + search.SearchMarkRight + "..."
}
}
content = util.EscapeHTML(content) // Search dialog XSS https://github.com/siyuan-note/siyuan/issues/8525
content, _ = markSearch(content, terms, beforeLen)
content = maxContent(content, 5120)
tag, _ := markSearch(sqlBlock.Tag, terms, beforeLen)
markdown := maxContent(sqlBlock.Markdown, 5120)
fContent := util.EscapeHTML(sqlBlock.FContent) // fContent 会用于和 content 对比,在反链计算时用于判断是否是列表项下第一个子块,所以也需要转义 https://github.com/siyuan-note/siyuan/issues/11001
block = &Block{
Box: sqlBlock.Box,
Path: sqlBlock.Path,
ID: id,
RootID: sqlBlock.RootID,
ParentID: sqlBlock.ParentID,
Alias: sqlBlock.Alias,
Name: sqlBlock.Name,
Memo: sqlBlock.Memo,
Tag: tag,
Content: content,
FContent: fContent,
Markdown: markdown,
Type: treenode.FromAbbrType(sqlBlock.Type),
SubType: sqlBlock.SubType,
Sort: sqlBlock.Sort,
}
if "" != sqlBlock.IAL {
block.IAL = map[string]string{}
ialStr := strings.TrimPrefix(sqlBlock.IAL, "{:")
ialStr = strings.TrimSuffix(ialStr, "}")
ial := parse.Tokens2IAL([]byte(ialStr))
for _, kv := range ial {
block.IAL[kv[0]] = kv[1]
}
}
hPath, _ := markSearch(sqlBlock.HPath, terms, 18)
if !strings.HasPrefix(hPath, "/") {
hPath = "/" + hPath
}
block.HPath = hPath
if "" != block.Name {
block.Name, _ = markSearch(block.Name, terms, 256)
}
if "" != block.Alias {
block.Alias, _ = markSearch(block.Alias, terms, 256)
}
if "" != block.Memo {
block.Memo, _ = markSearch(block.Memo, terms, 256)
}
return
}
func maxContent(content string, maxLen int) string {
idx := strings.Index(content, "<mark>")
if 128 < maxLen && maxLen <= idx {
head := bytes.Buffer{}
for i := 0; i < 512; i++ {
r, size := utf8.DecodeLastRuneInString(content[:idx])
head.WriteRune(r)
idx -= size
if 64 < head.Len() {
break
}
}
content = util.Reverse(head.String()) + content[idx:]
}
if maxLen < utf8.RuneCountInString(content) {
return gulu.Str.SubStr(content, maxLen) + "..."
}
return content
}
func fieldRegexp(regexp string) string {
buf := bytes.Buffer{}
buf.WriteString("(")
buf.WriteString("content REGEXP '")
buf.WriteString(regexp)
buf.WriteString("'")
if Conf.Search.Name {
buf.WriteString(" OR name REGEXP '")
buf.WriteString(regexp)
buf.WriteString("'")
}
if Conf.Search.Alias {
buf.WriteString(" OR alias REGEXP '")
buf.WriteString(regexp)
buf.WriteString("'")
}
if Conf.Search.Memo {
buf.WriteString(" OR memo REGEXP '")
buf.WriteString(regexp)
buf.WriteString("'")
}
if Conf.Search.IAL {
buf.WriteString(" OR ial REGEXP '")
buf.WriteString(regexp)
buf.WriteString("'")
}
buf.WriteString(" OR tag REGEXP '")
buf.WriteString(regexp)
buf.WriteString("')")
return buf.String()
}
func columnFilter() string {
buf := bytes.Buffer{}
buf.WriteString("{content")
if Conf.Search.Name {
buf.WriteString(" name")
}
if Conf.Search.Alias {
buf.WriteString(" alias")
}
if Conf.Search.Memo {
buf.WriteString(" memo")
}
if Conf.Search.IAL {
buf.WriteString(" ial")
}
buf.WriteString(" tag}")
return buf.String()
}
func stringQuery(query string) string {
if "" == strings.TrimSpace(query) {
return "\"" + query + "\""
}
query = strings.ReplaceAll(query, "\"", "\"\"")
query = strings.ReplaceAll(query, "'", "''")
buf := bytes.Buffer{}
parts := strings.Split(query, " ")
for _, part := range parts {
part = strings.TrimSpace(part)
part = "\"" + part + "\""
buf.WriteString(part)
buf.WriteString(" ")
}
return strings.TrimSpace(buf.String())
}
// markReplaceSpan 用于处理搜索高亮。
func markReplaceSpan(n *ast.Node, unlinks *[]*ast.Node, keywords []string, markSpanDataType string, luteEngine *lute.Lute) bool {
if ast.NodeText == n.Type {
text := n.Content()
escapedText := util.EscapeHTML(text)
escapedKeywords := make([]string, len(keywords))
for i, keyword := range keywords {
escapedKeywords[i] = util.EscapeHTML(keyword)
}
hText := search.EncloseHighlighting(escapedText, escapedKeywords, search.GetMarkSpanStart(markSpanDataType), search.GetMarkSpanEnd(), Conf.Search.CaseSensitive, false)
if hText != escapedText {
text = hText
}
n.Tokens = gulu.Str.ToBytes(text)
if bytes.Contains(n.Tokens, []byte(search.MarkDataType)) {
linkTree := parse.Inline("", n.Tokens, luteEngine.ParseOptions)
var children []*ast.Node
for c := linkTree.Root.FirstChild.FirstChild; nil != c; c = c.Next {
children = append(children, c)
}
for _, c := range children {
n.InsertBefore(c)
}
*unlinks = append(*unlinks, n)
return true
}
} else if ast.NodeTextMark == n.Type {
// 搜索结果高亮支持大部分行级元素 https://github.com/siyuan-note/siyuan/issues/6745
if n.IsTextMarkType("inline-math") || n.IsTextMarkType("inline-memo") {
return false
}
var text string
if n.IsTextMarkType("code") {
// code 在前面的 n.
for i, k := range keywords {
keywords[i] = html.EscapeString(k)
}
text = n.TextMarkTextContent
} else {
text = n.Content()
}
startTag := search.GetMarkSpanStart(markSpanDataType)
text = search.EncloseHighlighting(text, keywords, startTag, search.GetMarkSpanEnd(), Conf.Search.CaseSensitive, false)
if strings.Contains(text, search.MarkDataType) {
dataType := search.GetMarkSpanStart(n.TextMarkType + " " + search.MarkDataType)
text = strings.ReplaceAll(text, startTag, dataType)
tokens := gulu.Str.ToBytes(text)
linkTree := parse.Inline("", tokens, luteEngine.ParseOptions)
var children []*ast.Node
for c := linkTree.Root.FirstChild.FirstChild; nil != c; c = c.Next {
if ast.NodeText == c.Type {
c.Type = ast.NodeTextMark
c.TextMarkType = n.TextMarkType
c.TextMarkTextContent = string(c.Tokens)
if n.IsTextMarkType("a") {
c.TextMarkAHref, c.TextMarkATitle = n.TextMarkAHref, n.TextMarkATitle
} else if treenode.IsBlockRef(n) {
c.TextMarkBlockRefID = n.TextMarkBlockRefID
c.TextMarkBlockRefSubtype = n.TextMarkBlockRefSubtype
} else if treenode.IsFileAnnotationRef(n) {
c.TextMarkFileAnnotationRefID = n.TextMarkFileAnnotationRefID
}
} else if ast.NodeTextMark == c.Type {
if n.IsTextMarkType("a") {
c.TextMarkAHref, c.TextMarkATitle = n.TextMarkAHref, n.TextMarkATitle
} else if treenode.IsBlockRef(n) {
c.TextMarkBlockRefID = n.TextMarkBlockRefID
c.TextMarkBlockRefSubtype = n.TextMarkBlockRefSubtype
} else if treenode.IsFileAnnotationRef(n) {
c.TextMarkFileAnnotationRefID = n.TextMarkFileAnnotationRefID
}
}
children = append(children, c)
if nil != n.Next && ast.NodeKramdownSpanIAL == n.Next.Type {
c.KramdownIAL = n.KramdownIAL
ial := &ast.Node{Type: ast.NodeKramdownSpanIAL, Tokens: n.Next.Tokens}
children = append(children, ial)
}
}
for _, c := range children {
n.InsertBefore(c)
}
*unlinks = append(*unlinks, n)
return true
}
}
return false
}
// markReplaceSpanWithSplit 用于处理虚拟引用和反链提及高亮。
func markReplaceSpanWithSplit(text string, keywords []string, replacementStart, replacementEnd string) (ret string) {
// 虚拟引用和反链提及关键字按最长匹配优先 https://github.com/siyuan-note/siyuan/issues/7465
sort.Slice(keywords, func(i, j int) bool { return len(keywords[i]) > len(keywords[j]) })
tmp := search.EncloseHighlighting(text, keywords, replacementStart, replacementEnd, Conf.Search.CaseSensitive, true)
parts := strings.Split(tmp, replacementEnd)
buf := bytes.Buffer{}
for i := 0; i < len(parts); i++ {
if i >= len(parts)-1 {
buf.WriteString(parts[i])
break
}
if nextPart := parts[i+1]; 0 < len(nextPart) && lex.IsASCIILetter(nextPart[0]) {
// 取消已经高亮的部分
part := strings.ReplaceAll(parts[i], replacementStart, "")
buf.WriteString(part)
continue
}
buf.WriteString(parts[i])
buf.WriteString(replacementEnd)
}
ret = buf.String()
return
}
var (
searchIgnoreLastModified int64
searchIgnore []string
searchIgnoreLock = sync.Mutex{}
)
func getSearchIgnoreLines() (ret []string) {
// Support ignore search results https://github.com/siyuan-note/siyuan/issues/10089
now := time.Now().UnixMilli()
if now-searchIgnoreLastModified < 30*1000 {
return searchIgnore
}
searchIgnoreLock.Lock()
defer searchIgnoreLock.Unlock()
searchIgnoreLastModified = now
searchIgnorePath := filepath.Join(util.DataDir, ".siyuan", "searchignore")
err := os.MkdirAll(filepath.Dir(searchIgnorePath), 0755)
if err != nil {
return
}
if !gulu.File.IsExist(searchIgnorePath) {
if err = gulu.File.WriteFileSafer(searchIgnorePath, nil, 0644); err != nil {
logging.LogErrorf("create searchignore [%s] failed: %s", searchIgnorePath, err)
return
}
}
data, err := os.ReadFile(searchIgnorePath)
if err != nil {
logging.LogErrorf("read searchignore [%s] failed: %s", searchIgnorePath, err)
return
}
dataStr := string(data)
dataStr = strings.ReplaceAll(dataStr, "\r\n", "\n")
ret = strings.Split(dataStr, "\n")
ret = gulu.Str.RemoveDuplicatedElem(ret)
if 0 < len(ret) && "" == ret[0] {
ret = ret[1:]
}
searchIgnore = nil
for _, line := range ret {
searchIgnore = append(searchIgnore, line)
}
return
}
var (
refSearchIgnoreLastModified int64
refSearchIgnore []string
refSearchIgnoreLock = sync.Mutex{}
)
func getRefSearchIgnoreLines() (ret []string) {
// Support ignore search results https://github.com/siyuan-note/siyuan/issues/10089
now := time.Now().UnixMilli()
if now-refSearchIgnoreLastModified < 30*1000 {
return refSearchIgnore
}
refSearchIgnoreLock.Lock()
defer refSearchIgnoreLock.Unlock()
refSearchIgnoreLastModified = now
searchIgnorePath := filepath.Join(util.DataDir, ".siyuan", "refsearchignore")
err := os.MkdirAll(filepath.Dir(searchIgnorePath), 0755)
if err != nil {
return
}
if !gulu.File.IsExist(searchIgnorePath) {
if err = gulu.File.WriteFileSafer(searchIgnorePath, nil, 0644); err != nil {
logging.LogErrorf("create refsearchignore [%s] failed: %s", searchIgnorePath, err)
return
}
}
data, err := os.ReadFile(searchIgnorePath)
if err != nil {
logging.LogErrorf("read refsearchignore [%s] failed: %s", searchIgnorePath, err)
return
}
dataStr := string(data)
dataStr = strings.ReplaceAll(dataStr, "\r\n", "\n")
ret = strings.Split(dataStr, "\n")
ret = gulu.Str.RemoveDuplicatedElem(ret)
if 0 < len(ret) && "" == ret[0] {
ret = ret[1:]
}
refSearchIgnore = nil
for _, line := range ret {
refSearchIgnore = append(refSearchIgnore, line)
}
return
}
func filterQueryInvisibleChars(query string) string {
query = strings.ReplaceAll(query, " ", "_@full_width_space@_")
query = gulu.Str.RemoveInvisible(query)
query = strings.ReplaceAll(query, "_@full_width_space@_", " ")
return query
}
func replaceCaseInsensitive(input, old, new []byte) []byte {
re := regexp.MustCompile("(?i)" + regexp.QuoteMeta(string(old)))
return []byte(re.ReplaceAllString(string(input), string(new)))
}