使用 AC 算法优化虚拟引用匹配性能

This commit is contained in:
Liang Ding 2023-02-17 14:43:03 +08:00
parent 05c2696e0a
commit eabd1b67ed
No known key found for this signature in database
GPG key ID: 136F30F901A2231D
3 changed files with 35 additions and 9 deletions

View file

@ -58,10 +58,13 @@ require (
require (
dmitri.shuralyov.com/font/woff2 v0.0.0-20180220214647-957792cbbdab // indirect
github.com/BobuSumisu/aho-corasick v1.0.3 // indirect
github.com/Masterminds/goutils v1.1.1 // indirect
github.com/Masterminds/semver/v3 v3.2.0 // indirect
github.com/alecthomas/chroma v0.10.0 // indirect
github.com/andybalholm/cascadia v1.3.1 // indirect
github.com/anknown/ahocorasick v0.0.0-20190904063843-d75dbd5169c0 // indirect
github.com/anknown/darts v0.0.0-20151216065714-83ff685239e6 // indirect
github.com/asaskevich/EventBus v0.0.0-20200907212545-49d423059eef // indirect
github.com/aws/aws-sdk-go v1.44.199 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect

View file

@ -16,6 +16,8 @@ github.com/88250/pdfcpu v0.3.13 h1:touMWMZkCGalMIbEg9bxYp7rETM+zwb9hXjwhqi4I7Q=
github.com/88250/pdfcpu v0.3.13/go.mod h1:S5YT38L/GCjVjmB4PB84PymA1qfopjEhfhTNQilLpv4=
github.com/88250/vitess-sqlparser v0.0.0-20210205111146-56a2ded2aba1 h1:48T899JQDwyyRu9yXHePYlPdHtpJfrJEUGBMH3SMBWY=
github.com/88250/vitess-sqlparser v0.0.0-20210205111146-56a2ded2aba1/go.mod h1:U3pckKQIgxxkmZjV5yXQjHdGxQK0o/vEZeZ6cQsxfHw=
github.com/BobuSumisu/aho-corasick v1.0.3 h1:uuf+JHwU9CHP2Vx+wAy6jcksJThhJS9ehR8a+4nPE9g=
github.com/BobuSumisu/aho-corasick v1.0.3/go.mod h1:hm4jLcvZKI2vRF2WDU1N4p/jpWtpOzp3nLmi9AzX/XE=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/ConradIrwin/font v0.0.0-20210318200717-ce8d41cc0732 h1:0EDePskeF4vNFCk70ATaFHQzjmwXsk+VImnMJttecNU=
github.com/ConradIrwin/font v0.0.0-20210318200717-ce8d41cc0732/go.mod h1:krTLO7JWu6g8RMxG8sl+T1Hf8W93XQacBKJmqFZ2MFY=
@ -33,6 +35,10 @@ github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbf
github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/anknown/ahocorasick v0.0.0-20190904063843-d75dbd5169c0 h1:onfun1RA+KcxaMk1lfrRnwCd1UUuOjJM/lri5eM1qMs=
github.com/anknown/ahocorasick v0.0.0-20190904063843-d75dbd5169c0/go.mod h1:4yg+jNTYlDEzBjhGS96v+zjyA3lfXlFd5CiTLIkPBLI=
github.com/anknown/darts v0.0.0-20151216065714-83ff685239e6 h1:HblK3eJHq54yET63qPCTJnks3loDse5xRmmqHgHzwoI=
github.com/anknown/darts v0.0.0-20151216065714-83ff685239e6/go.mod h1:pbiaLIeYLUbgMY1kwEAdwO6UKD5ZNwdPGQlwokS9fe8=
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de h1:FxWPpzIjnTlhPwqqXc4/vE0f7GvRjuAsbW+HOIe8KnA=
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de/go.mod h1:DCaWoUhZrYW9p1lxo/cm8EmUOOzAPSEZNGF2DK1dJgw=
github.com/asaskevich/EventBus v0.0.0-20200907212545-49d423059eef h1:2JGTg6JapxP9/R33ZaagQtAM4EkkSYnIAlOG5EI8gkM=

View file

@ -18,6 +18,8 @@ package model
import (
"bytes"
goahocorasick "github.com/anknown/ahocorasick"
"github.com/siyuan-note/logging"
"regexp"
"sort"
"strings"
@ -27,7 +29,6 @@ import (
"github.com/88250/lute"
"github.com/88250/lute/ast"
"github.com/88250/lute/parse"
"github.com/cloudflare/ahocorasick"
"github.com/dgraph-io/ristretto"
"github.com/siyuan-note/siyuan/kernel/search"
"github.com/siyuan-note/siyuan/kernel/sql"
@ -70,22 +71,38 @@ func putBlockVirtualRefKeywords(blockContent, blockID, docTitle string) (ret []s
}
contentTmp := blockContent
keywordsTmp := keywords
var keywordsTmp [][]rune
if !Conf.Search.CaseSensitive {
contentTmp = strings.ToLower(blockContent)
for i, keyword := range keywordsTmp {
keywordsTmp[i] = strings.ToLower(keyword)
for _, keyword := range keywords {
keywordsTmp = append(keywordsTmp, []rune(strings.ToLower(keyword)))
}
} else {
for _, keyword := range keywords {
keywordsTmp = append(keywordsTmp, []rune(keyword))
}
}
if 1024*1024 < len(contentTmp) {
matcher := ahocorasick.NewStringMatcher(keywords)
hits := matcher.Match([]byte(contentTmp))
for _, hit := range hits {
ret = append(ret, keywords[hit])
m := goahocorasick.Machine{}
buildErr := m.Build(keywordsTmp)
if nil != buildErr {
logging.LogWarnf("build virtual ref keywords AC matcher failed: %s", buildErr)
for _, keywordRunes := range keywordsTmp {
keyword := string(keywordRunes)
if strings.Contains(contentTmp, keyword) {
ret = append(ret, keyword)
}
}
} else {
hits := m.MultiPatternSearch([]rune(contentTmp), false)
for _, hit := range hits {
ret = append(ret, string(hit.Word))
}
}
} else {
for _, keyword := range keywordsTmp {
for _, keywordRunes := range keywordsTmp {
keyword := string(keywordRunes)
if strings.Contains(contentTmp, keyword) {
ret = append(ret, keyword)
}