Quellcode durchsuchen

:zap: 使用 AC 算法优化虚拟引用匹配性能

Liang Ding vor 2 Jahren
Ursprung
Commit
05c2696e0a
3 geänderte Dateien mit 19 neuen und 7 gelöschten Zeilen
  1. 1 0
      kernel/go.mod
  2. 2 0
      kernel/go.sum
  3. 16 7
      kernel/model/virutalref.go

+ 1 - 0
kernel/go.mod

@@ -65,6 +65,7 @@ require (
 	github.com/asaskevich/EventBus v0.0.0-20200907212545-49d423059eef // indirect
 	github.com/aws/aws-sdk-go v1.44.199 // indirect
 	github.com/cespare/xxhash/v2 v2.2.0 // indirect
+	github.com/cloudflare/ahocorasick v0.0.0-20210425175752-730270c3e184 // indirect
 	github.com/dlclark/regexp2 v1.8.0 // indirect
 	github.com/dsnet/compress v0.0.1 // indirect
 	github.com/gin-contrib/sse v0.1.0 // indirect

+ 2 - 0
kernel/go.sum

@@ -42,6 +42,8 @@ github.com/aws/aws-sdk-go v1.44.199/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8
 github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
 github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/cloudflare/ahocorasick v0.0.0-20210425175752-730270c3e184 h1:8yL+85JpbwrIc6m+7N1iYrjn/22z68jwrTIBOJHNe4k=
+github.com/cloudflare/ahocorasick v0.0.0-20210425175752-730270c3e184/go.mod h1:tGWUZLZp9ajsxUOnHmFFLnqnlKXsCn6GReG4jAD59H0=
 github.com/common-nighthawk/go-figure v0.0.0-20210622060536-734e95fb86be h1:J5BL2kskAlV9ckgEsNQXscjIaLiOYiZ75d4e94E6dcQ=
 github.com/common-nighthawk/go-figure v0.0.0-20210622060536-734e95fb86be/go.mod h1:mk5IQ+Y0ZeO87b858TlA645sVcEcbiX6YqP98kt+7+w=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=

+ 16 - 7
kernel/model/virutalref.go

@@ -27,6 +27,7 @@ import (
 	"github.com/88250/lute"
 	"github.com/88250/lute/ast"
 	"github.com/88250/lute/parse"
+	"github.com/cloudflare/ahocorasick"
 	"github.com/dgraph-io/ristretto"
 	"github.com/siyuan-note/siyuan/kernel/search"
 	"github.com/siyuan-note/siyuan/kernel/sql"
@@ -69,17 +70,25 @@ func putBlockVirtualRefKeywords(blockContent, blockID, docTitle string) (ret []s
 	}
 
 	contentTmp := blockContent
+	keywordsTmp := keywords
 	if !Conf.Search.CaseSensitive {
 		contentTmp = strings.ToLower(blockContent)
-	}
-	for _, keyword := range keywords {
-		keywordTmp := keyword
-		if !Conf.Search.CaseSensitive {
-			keywordTmp = strings.ToLower(keyword)
+		for i, keyword := range keywordsTmp {
+			keywordsTmp[i] = strings.ToLower(keyword)
 		}
+	}
 
-		if strings.Contains(contentTmp, keywordTmp) {
-			ret = append(ret, keyword)
+	if 1024*1024 < len(contentTmp) {
+		matcher := ahocorasick.NewStringMatcher(keywords)
+		hits := matcher.Match([]byte(contentTmp))
+		for _, hit := range hits {
+			ret = append(ret, keywords[hit])
+		}
+	} else {
+		for _, keyword := range keywordsTmp {
+			if strings.Contains(contentTmp, keyword) {
+				ret = append(ret, keyword)
+			}
 		}
 	}