⚡ 使用 AC 算法优化虚拟引用匹配性能
This commit is contained in:
parent
05c2696e0a
commit
eabd1b67ed
3 changed files with 35 additions and 9 deletions
|
@ -58,10 +58,13 @@ require (
|
|||
|
||||
require (
|
||||
dmitri.shuralyov.com/font/woff2 v0.0.0-20180220214647-957792cbbdab // indirect
|
||||
github.com/BobuSumisu/aho-corasick v1.0.3 // indirect
|
||||
github.com/Masterminds/goutils v1.1.1 // indirect
|
||||
github.com/Masterminds/semver/v3 v3.2.0 // indirect
|
||||
github.com/alecthomas/chroma v0.10.0 // indirect
|
||||
github.com/andybalholm/cascadia v1.3.1 // indirect
|
||||
github.com/anknown/ahocorasick v0.0.0-20190904063843-d75dbd5169c0 // indirect
|
||||
github.com/anknown/darts v0.0.0-20151216065714-83ff685239e6 // indirect
|
||||
github.com/asaskevich/EventBus v0.0.0-20200907212545-49d423059eef // indirect
|
||||
github.com/aws/aws-sdk-go v1.44.199 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.2.0 // indirect
|
||||
|
|
|
@ -16,6 +16,8 @@ github.com/88250/pdfcpu v0.3.13 h1:touMWMZkCGalMIbEg9bxYp7rETM+zwb9hXjwhqi4I7Q=
|
|||
github.com/88250/pdfcpu v0.3.13/go.mod h1:S5YT38L/GCjVjmB4PB84PymA1qfopjEhfhTNQilLpv4=
|
||||
github.com/88250/vitess-sqlparser v0.0.0-20210205111146-56a2ded2aba1 h1:48T899JQDwyyRu9yXHePYlPdHtpJfrJEUGBMH3SMBWY=
|
||||
github.com/88250/vitess-sqlparser v0.0.0-20210205111146-56a2ded2aba1/go.mod h1:U3pckKQIgxxkmZjV5yXQjHdGxQK0o/vEZeZ6cQsxfHw=
|
||||
github.com/BobuSumisu/aho-corasick v1.0.3 h1:uuf+JHwU9CHP2Vx+wAy6jcksJThhJS9ehR8a+4nPE9g=
|
||||
github.com/BobuSumisu/aho-corasick v1.0.3/go.mod h1:hm4jLcvZKI2vRF2WDU1N4p/jpWtpOzp3nLmi9AzX/XE=
|
||||
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
|
||||
github.com/ConradIrwin/font v0.0.0-20210318200717-ce8d41cc0732 h1:0EDePskeF4vNFCk70ATaFHQzjmwXsk+VImnMJttecNU=
|
||||
github.com/ConradIrwin/font v0.0.0-20210318200717-ce8d41cc0732/go.mod h1:krTLO7JWu6g8RMxG8sl+T1Hf8W93XQacBKJmqFZ2MFY=
|
||||
|
@ -33,6 +35,10 @@ github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbf
|
|||
github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s=
|
||||
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
|
||||
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
|
||||
github.com/anknown/ahocorasick v0.0.0-20190904063843-d75dbd5169c0 h1:onfun1RA+KcxaMk1lfrRnwCd1UUuOjJM/lri5eM1qMs=
|
||||
github.com/anknown/ahocorasick v0.0.0-20190904063843-d75dbd5169c0/go.mod h1:4yg+jNTYlDEzBjhGS96v+zjyA3lfXlFd5CiTLIkPBLI=
|
||||
github.com/anknown/darts v0.0.0-20151216065714-83ff685239e6 h1:HblK3eJHq54yET63qPCTJnks3loDse5xRmmqHgHzwoI=
|
||||
github.com/anknown/darts v0.0.0-20151216065714-83ff685239e6/go.mod h1:pbiaLIeYLUbgMY1kwEAdwO6UKD5ZNwdPGQlwokS9fe8=
|
||||
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de h1:FxWPpzIjnTlhPwqqXc4/vE0f7GvRjuAsbW+HOIe8KnA=
|
||||
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de/go.mod h1:DCaWoUhZrYW9p1lxo/cm8EmUOOzAPSEZNGF2DK1dJgw=
|
||||
github.com/asaskevich/EventBus v0.0.0-20200907212545-49d423059eef h1:2JGTg6JapxP9/R33ZaagQtAM4EkkSYnIAlOG5EI8gkM=
|
||||
|
|
|
@ -18,6 +18,8 @@ package model
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
goahocorasick "github.com/anknown/ahocorasick"
|
||||
"github.com/siyuan-note/logging"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
|
@ -27,7 +29,6 @@ import (
|
|||
"github.com/88250/lute"
|
||||
"github.com/88250/lute/ast"
|
||||
"github.com/88250/lute/parse"
|
||||
"github.com/cloudflare/ahocorasick"
|
||||
"github.com/dgraph-io/ristretto"
|
||||
"github.com/siyuan-note/siyuan/kernel/search"
|
||||
"github.com/siyuan-note/siyuan/kernel/sql"
|
||||
|
@ -70,22 +71,38 @@ func putBlockVirtualRefKeywords(blockContent, blockID, docTitle string) (ret []s
|
|||
}
|
||||
|
||||
contentTmp := blockContent
|
||||
keywordsTmp := keywords
|
||||
var keywordsTmp [][]rune
|
||||
if !Conf.Search.CaseSensitive {
|
||||
contentTmp = strings.ToLower(blockContent)
|
||||
for i, keyword := range keywordsTmp {
|
||||
keywordsTmp[i] = strings.ToLower(keyword)
|
||||
for _, keyword := range keywords {
|
||||
keywordsTmp = append(keywordsTmp, []rune(strings.ToLower(keyword)))
|
||||
}
|
||||
} else {
|
||||
for _, keyword := range keywords {
|
||||
keywordsTmp = append(keywordsTmp, []rune(keyword))
|
||||
}
|
||||
}
|
||||
|
||||
if 1024*1024 < len(contentTmp) {
|
||||
matcher := ahocorasick.NewStringMatcher(keywords)
|
||||
hits := matcher.Match([]byte(contentTmp))
|
||||
for _, hit := range hits {
|
||||
ret = append(ret, keywords[hit])
|
||||
m := goahocorasick.Machine{}
|
||||
buildErr := m.Build(keywordsTmp)
|
||||
if nil != buildErr {
|
||||
logging.LogWarnf("build virtual ref keywords AC matcher failed: %s", buildErr)
|
||||
for _, keywordRunes := range keywordsTmp {
|
||||
keyword := string(keywordRunes)
|
||||
if strings.Contains(contentTmp, keyword) {
|
||||
ret = append(ret, keyword)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
hits := m.MultiPatternSearch([]rune(contentTmp), false)
|
||||
for _, hit := range hits {
|
||||
ret = append(ret, string(hit.Word))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for _, keyword := range keywordsTmp {
|
||||
for _, keywordRunes := range keywordsTmp {
|
||||
keyword := string(keywordRunes)
|
||||
if strings.Contains(contentTmp, keyword) {
|
||||
ret = append(ret, keyword)
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue