使用 AC 算法优化虚拟引用匹配性能

This commit is contained in:
Liang Ding 2023-02-17 14:23:06 +08:00
parent c61e0ef3f2
commit 05c2696e0a
No known key found for this signature in database
GPG key ID: 136F30F901A2231D
3 changed files with 19 additions and 7 deletions

View file

@ -65,6 +65,7 @@ require (
github.com/asaskevich/EventBus v0.0.0-20200907212545-49d423059eef // indirect
github.com/aws/aws-sdk-go v1.44.199 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/cloudflare/ahocorasick v0.0.0-20210425175752-730270c3e184 // indirect
github.com/dlclark/regexp2 v1.8.0 // indirect
github.com/dsnet/compress v0.0.1 // indirect
github.com/gin-contrib/sse v0.1.0 // indirect

View file

@ -42,6 +42,8 @@ github.com/aws/aws-sdk-go v1.44.199/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cloudflare/ahocorasick v0.0.0-20210425175752-730270c3e184 h1:8yL+85JpbwrIc6m+7N1iYrjn/22z68jwrTIBOJHNe4k=
github.com/cloudflare/ahocorasick v0.0.0-20210425175752-730270c3e184/go.mod h1:tGWUZLZp9ajsxUOnHmFFLnqnlKXsCn6GReG4jAD59H0=
github.com/common-nighthawk/go-figure v0.0.0-20210622060536-734e95fb86be h1:J5BL2kskAlV9ckgEsNQXscjIaLiOYiZ75d4e94E6dcQ=
github.com/common-nighthawk/go-figure v0.0.0-20210622060536-734e95fb86be/go.mod h1:mk5IQ+Y0ZeO87b858TlA645sVcEcbiX6YqP98kt+7+w=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=

View file

@ -27,6 +27,7 @@ import (
"github.com/88250/lute"
"github.com/88250/lute/ast"
"github.com/88250/lute/parse"
"github.com/cloudflare/ahocorasick"
"github.com/dgraph-io/ristretto"
"github.com/siyuan-note/siyuan/kernel/search"
"github.com/siyuan-note/siyuan/kernel/sql"
@ -69,17 +70,25 @@ func putBlockVirtualRefKeywords(blockContent, blockID, docTitle string) (ret []s
}
contentTmp := blockContent
keywordsTmp := keywords
if !Conf.Search.CaseSensitive {
contentTmp = strings.ToLower(blockContent)
}
for _, keyword := range keywords {
keywordTmp := keyword
if !Conf.Search.CaseSensitive {
keywordTmp = strings.ToLower(keyword)
for i, keyword := range keywordsTmp {
keywordsTmp[i] = strings.ToLower(keyword)
}
}
if strings.Contains(contentTmp, keywordTmp) {
ret = append(ret, keyword)
if 1024*1024 < len(contentTmp) {
matcher := ahocorasick.NewStringMatcher(keywords)
hits := matcher.Match([]byte(contentTmp))
for _, hit := range hits {
ret = append(ret, keywords[hit])
}
} else {
for _, keyword := range keywordsTmp {
if strings.Contains(contentTmp, keyword) {
ret = append(ret, keyword)
}
}
}