513 lines
15 KiB
Go
513 lines
15 KiB
Go
// SiYuan - Build Your Eternal Digital Garden
|
|
// Copyright (c) 2020-present, b3log.org
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
package model
|
|
|
|
import (
|
|
"bytes"
|
|
"path"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
"unicode/utf8"
|
|
|
|
"github.com/88250/gulu"
|
|
"github.com/88250/lute/ast"
|
|
"github.com/88250/lute/html"
|
|
"github.com/88250/lute/parse"
|
|
"github.com/jinzhu/copier"
|
|
"github.com/siyuan-note/siyuan/kernel/conf"
|
|
"github.com/siyuan-note/siyuan/kernel/search"
|
|
"github.com/siyuan-note/siyuan/kernel/sql"
|
|
"github.com/siyuan-note/siyuan/kernel/treenode"
|
|
"github.com/siyuan-note/siyuan/kernel/util"
|
|
"github.com/xrash/smetrics"
|
|
)
|
|
|
|
func SearchEmbedBlock(stmt string, excludeIDs []string, headingMode int) (ret []*Block) {
|
|
WaitForWritingFiles()
|
|
return searchEmbedBlock(stmt, excludeIDs, headingMode)
|
|
}
|
|
|
|
func searchEmbedBlock(stmt string, excludeIDs []string, headingMode int) (ret []*Block) {
|
|
sqlBlocks := sql.SelectBlocksRawStmtNoParse(stmt, Conf.Search.Limit)
|
|
var tmp []*sql.Block
|
|
for _, b := range sqlBlocks {
|
|
if !gulu.Str.Contains(b.ID, excludeIDs) {
|
|
tmp = append(tmp, b)
|
|
}
|
|
}
|
|
sqlBlocks = tmp
|
|
for _, sb := range sqlBlocks {
|
|
block := getBlockRendered(sb.ID, headingMode)
|
|
if nil == block {
|
|
continue
|
|
}
|
|
ret = append(ret, block)
|
|
}
|
|
|
|
if 1 > len(ret) {
|
|
ret = []*Block{}
|
|
}
|
|
return
|
|
}
|
|
|
|
func SearchRefBlock(id, rootID, keyword string, beforeLen int) (ret []*Block, newDoc bool) {
|
|
if "" == keyword {
|
|
// 查询为空时默认的块引排序规则按最近使用优先 https://github.com/siyuan-note/siyuan/issues/3218
|
|
refs := sql.QueryRefsRecent()
|
|
for _, ref := range refs {
|
|
sqlBlock := sql.GetBlock(ref.DefBlockID)
|
|
block := fromSQLBlock(sqlBlock, "", beforeLen)
|
|
if nil == block {
|
|
continue
|
|
}
|
|
block.Content = maxContent(block.Content, Conf.Editor.BlockRefDynamicAnchorTextMaxLen)
|
|
block.RefText = block.Content
|
|
if block.IsContainerBlock() {
|
|
block.RefText = block.FContent // `((` 引用列表项时使用第一个子块作为动态锚文本 https://github.com/siyuan-note/siyuan/issues/4536
|
|
}
|
|
ret = append(ret, block)
|
|
}
|
|
if 1 > len(ret) {
|
|
ret = []*Block{}
|
|
}
|
|
return
|
|
}
|
|
|
|
ret = fullTextSearchRefBlock(keyword, beforeLen)
|
|
tmp := ret[:0]
|
|
trees := map[string]*parse.Tree{}
|
|
for _, b := range ret {
|
|
hitFirstChildID := false
|
|
b.RefText = b.Content
|
|
if b.IsContainerBlock() {
|
|
b.RefText = b.FContent // `((` 引用列表项时使用第一个子块作为动态锚文本 https://github.com/siyuan-note/siyuan/issues/4536
|
|
|
|
// `((` 引用候选中排除当前块的父块 https://github.com/siyuan-note/siyuan/issues/4538
|
|
tree := trees[b.RootID]
|
|
if nil == tree {
|
|
tree, _ = loadTreeByBlockID(b.RootID)
|
|
trees[b.RootID] = tree
|
|
}
|
|
if nil != tree {
|
|
bNode := treenode.GetNodeInTree(tree, b.ID)
|
|
if fc := treenode.FirstLeafBlock(bNode); nil != fc && fc.ID == id {
|
|
hitFirstChildID = true
|
|
}
|
|
}
|
|
}
|
|
|
|
if b.ID != id && !hitFirstChildID && b.ID != rootID {
|
|
b.Content = maxContent(b.Content, Conf.Editor.BlockRefDynamicAnchorTextMaxLen)
|
|
tmp = append(tmp, b)
|
|
}
|
|
}
|
|
ret = tmp
|
|
|
|
if "" != keyword {
|
|
if block := treenode.GetBlockTree(id); nil != block {
|
|
p := path.Join(block.HPath, keyword)
|
|
newDoc = nil == treenode.GetBlockTreeRootByHPath(block.BoxID, p)
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
func FindReplace(keyword, replacement string, ids []string) (err error) {
|
|
keyword = strings.Trim(keyword, "\"") // FTS 字符串需要去除双引号
|
|
if keyword == replacement {
|
|
return
|
|
}
|
|
|
|
ids = util.RemoveDuplicatedElem(ids)
|
|
for _, id := range ids {
|
|
var tree *parse.Tree
|
|
tree, err = loadTreeByBlockID(id)
|
|
if nil != err {
|
|
return
|
|
}
|
|
|
|
node := treenode.GetNodeInTree(tree, id)
|
|
if nil == node {
|
|
return
|
|
}
|
|
|
|
ast.Walk(node, func(n *ast.Node, entering bool) ast.WalkStatus {
|
|
if !entering {
|
|
return ast.WalkContinue
|
|
}
|
|
|
|
switch n.Type {
|
|
case ast.NodeDocument:
|
|
title := n.IALAttr("title")
|
|
if strings.Contains(title, keyword) {
|
|
n.SetIALAttr("title", strings.ReplaceAll(title, keyword, replacement))
|
|
}
|
|
case ast.NodeText, ast.NodeLinkText, ast.NodeLinkTitle, ast.NodeCodeSpanContent, ast.NodeCodeBlockCode, ast.NodeInlineMathContent, ast.NodeMathBlockContent:
|
|
if bytes.Contains(n.Tokens, []byte(keyword)) {
|
|
n.Tokens = bytes.ReplaceAll(n.Tokens, []byte(keyword), []byte(replacement))
|
|
}
|
|
}
|
|
return ast.WalkContinue
|
|
})
|
|
|
|
if err = writeJSONQueue(tree); nil != err {
|
|
return
|
|
}
|
|
}
|
|
|
|
WaitForWritingFiles()
|
|
if 1 < len(ids) {
|
|
go func() {
|
|
time.Sleep(time.Second)
|
|
util.ReloadUI()
|
|
}()
|
|
}
|
|
return
|
|
}
|
|
|
|
func FullTextSearchBlock(query, box, path string, types map[string]bool, querySyntax bool) (ret []*Block) {
|
|
query = strings.TrimSpace(query)
|
|
if queryStrLower := strings.ToLower(query); strings.Contains(queryStrLower, "select ") && strings.Contains(queryStrLower, " * ") && strings.Contains(queryStrLower, " from ") {
|
|
ret = searchBySQL(query, 12)
|
|
} else {
|
|
filter := searchFilter(types)
|
|
ret = fullTextSearch(query, box, path, filter, 12, querySyntax)
|
|
}
|
|
return
|
|
}
|
|
|
|
func searchFilter(types map[string]bool) string {
|
|
s := conf.NewSearch()
|
|
if err := copier.Copy(s, Conf.Search); nil != err {
|
|
util.LogErrorf("copy search conf failed: %s", err)
|
|
}
|
|
if nil != types {
|
|
s.Document = types["document"]
|
|
s.Heading = types["heading"]
|
|
s.List = types["list"]
|
|
s.ListItem = types["listItem"]
|
|
s.CodeBlock = types["codeBlock"]
|
|
s.MathBlock = types["mathBlock"]
|
|
s.Table = types["table"]
|
|
s.Blockquote = types["blockquote"]
|
|
s.SuperBlock = types["superBlock"]
|
|
s.Paragraph = types["paragraph"]
|
|
s.HTMLBlock = types["htmlBlock"]
|
|
} else {
|
|
s.Document = Conf.Search.Document
|
|
s.Heading = Conf.Search.Heading
|
|
s.List = Conf.Search.List
|
|
s.ListItem = Conf.Search.ListItem
|
|
s.CodeBlock = Conf.Search.CodeBlock
|
|
s.MathBlock = Conf.Search.MathBlock
|
|
s.Table = Conf.Search.Table
|
|
s.Blockquote = Conf.Search.Blockquote
|
|
s.SuperBlock = Conf.Search.SuperBlock
|
|
s.Paragraph = Conf.Search.Paragraph
|
|
s.HTMLBlock = Conf.Search.HTMLBlock
|
|
}
|
|
return s.TypeFilter()
|
|
}
|
|
|
|
func searchBySQL(stmt string, beforeLen int) (ret []*Block) {
|
|
stmt = util.RemoveInvisible(stmt)
|
|
blocks := sql.SelectBlocksRawStmt(stmt, Conf.Search.Limit)
|
|
ret = fromSQLBlocks(&blocks, "", beforeLen)
|
|
if 1 > len(ret) {
|
|
ret = []*Block{}
|
|
}
|
|
return
|
|
}
|
|
|
|
func fullTextSearchRefBlock(keyword string, beforeLen int) (ret []*Block) {
|
|
keyword = util.RemoveInvisible(keyword)
|
|
|
|
if util.IsIDPattern(keyword) {
|
|
ret = searchBySQL("SELECT * FROM `blocks` WHERE `id` = '"+keyword+"'", 12)
|
|
return
|
|
}
|
|
|
|
quotedKeyword := stringQuery(keyword)
|
|
table := "blocks_fts" // 大小写敏感
|
|
if !Conf.Search.CaseSensitive {
|
|
table = "blocks_fts_case_insensitive"
|
|
}
|
|
|
|
projections := "id, parent_id, root_id, hash, box, path, " +
|
|
"highlight(" + table + ", 6, '__@mark__', '__mark@__') AS hpath, " +
|
|
"highlight(" + table + ", 7, '__@mark__', '__mark@__') AS name, " +
|
|
"highlight(" + table + ", 8, '__@mark__', '__mark@__') AS alias, " +
|
|
"highlight(" + table + ", 9, '__@mark__', '__mark@__') AS memo, " +
|
|
"tag, " +
|
|
"highlight(" + table + ", 11, '__@mark__', '__mark@__') AS content, " +
|
|
"fcontent, markdown, length, type, subtype, ial, sort, created, updated"
|
|
stmt := "SELECT " + projections + " FROM " + table + " WHERE " + table + " MATCH '" + columnFilter() + ":(" + quotedKeyword + ")' AND type IN " + Conf.Search.TypeFilter()
|
|
orderBy := ` order by case
|
|
when name = '${keyword}' then 10
|
|
when alias = '${keyword}' then 20
|
|
when memo = '${keyword}' then 30
|
|
when content = '${keyword}' and type = 'd' then 40
|
|
when content LIKE '%${keyword}%' and type = 'd' then 41
|
|
when name LIKE '%${keyword}%' then 50
|
|
when alias LIKE '%${keyword}%' then 60
|
|
when content = '${keyword}' and type = 'h' then 70
|
|
when content LIKE '%${keyword}%' and type = 'h' then 71
|
|
when fcontent = '${keyword}' and type = 'i' then 80
|
|
when fcontent LIKE '%${keyword}%' and type = 'i' then 81
|
|
when memo LIKE '%${keyword}%' then 90
|
|
when content LIKE '%${keyword}%' and type != 'i' and type != 'l' then 100
|
|
else 65535 end ASC, sort ASC, length ASC`
|
|
orderBy = strings.ReplaceAll(orderBy, "${keyword}", keyword)
|
|
stmt += orderBy + " LIMIT " + strconv.Itoa(Conf.Search.Limit)
|
|
blocks := sql.SelectBlocksRawStmt(stmt, Conf.Search.Limit)
|
|
ret = fromSQLBlocks(&blocks, "", beforeLen)
|
|
if 1 > len(ret) {
|
|
ret = []*Block{}
|
|
}
|
|
return
|
|
}
|
|
|
|
func fullTextSearch(query, box, path, filter string, beforeLen int, querySyntax bool) (ret []*Block) {
|
|
query = util.RemoveInvisible(query)
|
|
if util.IsIDPattern(query) {
|
|
ret = searchBySQL("SELECT * FROM `blocks` WHERE `id` = '"+query+"'", beforeLen)
|
|
return
|
|
}
|
|
|
|
if !querySyntax {
|
|
query = stringQuery(query)
|
|
}
|
|
|
|
table := "blocks_fts" // 大小写敏感
|
|
if !Conf.Search.CaseSensitive {
|
|
table = "blocks_fts_case_insensitive"
|
|
}
|
|
projections := "id, parent_id, root_id, hash, box, path, " +
|
|
"highlight(" + table + ", 6, '__@mark__', '__mark@__') AS hpath, " +
|
|
"highlight(" + table + ", 7, '__@mark__', '__mark@__') AS name, " +
|
|
"highlight(" + table + ", 8, '__@mark__', '__mark@__') AS alias, " +
|
|
"highlight(" + table + ", 9, '__@mark__', '__mark@__') AS memo, " +
|
|
"tag, " +
|
|
"highlight(" + table + ", 11, '__@mark__', '__mark@__') AS content, " +
|
|
"fcontent, markdown, length, type, subtype, ial, sort, created, updated"
|
|
stmt := "SELECT " + projections + " FROM " + table + " WHERE " + table + " MATCH '" + columnFilter() + ":(" + query + ")' AND type IN " + filter
|
|
if "" != box {
|
|
stmt += " AND box = '" + box + "'"
|
|
}
|
|
if "" != path {
|
|
stmt += " AND path LIKE '" + path + "%'"
|
|
}
|
|
stmt += " ORDER BY sort ASC, rank ASC LIMIT " + strconv.Itoa(Conf.Search.Limit)
|
|
blocks := sql.SelectBlocksRawStmt(stmt, Conf.Search.Limit)
|
|
ret = fromSQLBlocks(&blocks, "", beforeLen)
|
|
if 1 > len(ret) {
|
|
ret = []*Block{}
|
|
}
|
|
return
|
|
}
|
|
|
|
func query2Stmt(queryStr string) (ret string) {
|
|
buf := bytes.Buffer{}
|
|
if util.IsIDPattern(queryStr) {
|
|
buf.WriteString("id = '" + queryStr + "'")
|
|
} else {
|
|
var tags []string
|
|
luteEngine := NewLute()
|
|
t := parse.Inline("", []byte(queryStr), luteEngine.ParseOptions)
|
|
ast.Walk(t.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
|
|
if !entering {
|
|
return ast.WalkContinue
|
|
}
|
|
if ast.NodeTag == n.Type {
|
|
tags = append(tags, n.Text())
|
|
}
|
|
return ast.WalkContinue
|
|
})
|
|
|
|
for _, tag := range tags {
|
|
queryStr = strings.ReplaceAll(queryStr, "#"+tag+"#", "")
|
|
}
|
|
parts := strings.Split(queryStr, " ")
|
|
|
|
for i, part := range parts {
|
|
if "" == part {
|
|
continue
|
|
}
|
|
part = strings.ReplaceAll(part, "'", "''")
|
|
buf.WriteString("(content LIKE '%" + part + "%'")
|
|
buf.WriteString(Conf.Search.NAMFilter(part))
|
|
buf.WriteString(")")
|
|
if i < len(parts)-1 {
|
|
buf.WriteString(" AND ")
|
|
}
|
|
}
|
|
|
|
if 0 < len(tags) {
|
|
if 0 < buf.Len() {
|
|
buf.WriteString(" OR ")
|
|
}
|
|
for i, tag := range tags {
|
|
buf.WriteString("(content LIKE '%#" + tag + "#%')")
|
|
if i < len(tags)-1 {
|
|
buf.WriteString(" AND ")
|
|
}
|
|
}
|
|
buf.WriteString(" OR ")
|
|
for i, tag := range tags {
|
|
buf.WriteString("ial LIKE '%tags=\"%" + tag + "%\"%'")
|
|
if i < len(tags)-1 {
|
|
buf.WriteString(" AND ")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if 1 > buf.Len() {
|
|
buf.WriteString("1=1")
|
|
}
|
|
ret = buf.String()
|
|
return
|
|
}
|
|
|
|
func markSearch(text string, keyword string, beforeLen int) (pos int, marked string, score float64) {
|
|
if 0 == len(keyword) {
|
|
marked = text
|
|
if maxLen := 5120; maxLen < utf8.RuneCountInString(marked) {
|
|
marked = gulu.Str.SubStr(marked, maxLen) + "..."
|
|
}
|
|
marked = html.EscapeString(marked)
|
|
marked = strings.ReplaceAll(marked, "__@mark__", "<mark>")
|
|
marked = strings.ReplaceAll(marked, "__mark@__", "</mark>")
|
|
return
|
|
}
|
|
|
|
pos, marked = search.MarkText(text, keyword, beforeLen, Conf.Search.CaseSensitive)
|
|
if -1 < pos {
|
|
if 0 == pos {
|
|
score = 1
|
|
}
|
|
score += float64(strings.Count(marked, "<mark>"))
|
|
winkler := smetrics.JaroWinkler(text, keyword, 0.7, 4)
|
|
score += winkler
|
|
}
|
|
score = -score // 分越小排序越靠前
|
|
return
|
|
}
|
|
|
|
func fromSQLBlocks(sqlBlocks *[]*sql.Block, terms string, beforeLen int) (ret []*Block) {
|
|
for _, sqlBlock := range *sqlBlocks {
|
|
ret = append(ret, fromSQLBlock(sqlBlock, terms, beforeLen))
|
|
}
|
|
return
|
|
}
|
|
|
|
func fromSQLBlock(sqlBlock *sql.Block, terms string, beforeLen int) (block *Block) {
|
|
if nil == sqlBlock {
|
|
return
|
|
}
|
|
|
|
id := sqlBlock.ID
|
|
content := sqlBlock.Content
|
|
p := sqlBlock.Path
|
|
|
|
_, content, _ = markSearch(content, terms, beforeLen)
|
|
markdown := maxContent(sqlBlock.Markdown, 5120)
|
|
content = maxContent(content, 5120)
|
|
|
|
block = &Block{
|
|
Box: sqlBlock.Box,
|
|
Path: p,
|
|
ID: id,
|
|
RootID: sqlBlock.RootID,
|
|
ParentID: sqlBlock.ParentID,
|
|
Alias: sqlBlock.Alias,
|
|
Name: sqlBlock.Name,
|
|
Memo: sqlBlock.Memo,
|
|
Tag: sqlBlock.Tag,
|
|
Content: content,
|
|
FContent: sqlBlock.FContent,
|
|
Markdown: markdown,
|
|
Type: treenode.FromAbbrType(sqlBlock.Type),
|
|
SubType: sqlBlock.SubType,
|
|
}
|
|
if "" != sqlBlock.IAL {
|
|
block.IAL = map[string]string{}
|
|
ialStr := strings.TrimPrefix(sqlBlock.IAL, "{:")
|
|
ialStr = strings.TrimSuffix(ialStr, "}")
|
|
ial := parse.Tokens2IAL([]byte(ialStr))
|
|
for _, kv := range ial {
|
|
block.IAL[kv[0]] = kv[1]
|
|
}
|
|
}
|
|
|
|
_, hPath, _ := markSearch(sqlBlock.HPath, terms, 18)
|
|
if !strings.HasPrefix(hPath, "/") {
|
|
hPath = "/" + hPath
|
|
}
|
|
block.HPath = hPath
|
|
|
|
if "" != block.Name {
|
|
_, block.Name, _ = markSearch(block.Name, terms, 256)
|
|
}
|
|
if "" != block.Alias {
|
|
_, block.Alias, _ = markSearch(block.Alias, terms, 256)
|
|
}
|
|
if "" != block.Memo {
|
|
_, block.Memo, _ = markSearch(block.Memo, terms, 256)
|
|
}
|
|
return
|
|
}
|
|
|
|
func maxContent(content string, maxLen int) string {
|
|
if maxLen < utf8.RuneCountInString(content) {
|
|
return gulu.Str.SubStr(content, maxLen) + "..."
|
|
}
|
|
return content
|
|
}
|
|
|
|
func columnFilter() string {
|
|
buf := bytes.Buffer{}
|
|
buf.WriteString("{content")
|
|
if Conf.Search.Name {
|
|
buf.WriteString(" name")
|
|
}
|
|
if Conf.Search.Alias {
|
|
buf.WriteString(" alias")
|
|
}
|
|
if Conf.Search.Memo {
|
|
buf.WriteString(" memo")
|
|
}
|
|
if Conf.Search.Custom {
|
|
buf.WriteString(" ial")
|
|
}
|
|
buf.WriteString(" tag}")
|
|
return buf.String()
|
|
}
|
|
|
|
func stringQuery(query string) string {
|
|
query = strings.ReplaceAll(query, "\"", "\"\"")
|
|
|
|
buf := bytes.Buffer{}
|
|
parts := strings.Split(query, " ")
|
|
for _, part := range parts {
|
|
part = strings.TrimSpace(part)
|
|
part = "\"" + part + "\""
|
|
buf.WriteString(part)
|
|
buf.WriteString(" ")
|
|
}
|
|
return strings.TrimSpace(buf.String())
|
|
}
|