瀏覽代碼

Add experimental support for re2 (#2138)

blotus 2 年之前
父節點
當前提交
772d5b5c32
共有 9 個文件被更改,包括 71 次插入24 次删除
  1. 1 1
      .github/workflows/bats-hub.yml
  2. 5 3
      go.mod
  3. 12 3
      go.sum
  4. 35 12
      pkg/exprhelpers/helpers.go
  5. 11 0
      pkg/fflag/crowdsec.go
  6. 2 2
      pkg/parser/node.go
  7. 2 2
      pkg/parser/parsing_test.go
  8. 2 0
      pkg/parser/unix_parser.go
  9. 1 1
      pkg/types/grok_pattern.go

+ 1 - 1
.github/workflows/bats-hub.yml

@@ -19,7 +19,7 @@ jobs:
 
     name: "Build + tests"
     runs-on: ubuntu-latest
-    timeout-minutes: 20
+    timeout-minutes: 30
     steps:
 
     - name: "Force machineid"

+ 5 - 3
go.mod

@@ -15,7 +15,7 @@ require (
 	github.com/confluentinc/bincover v0.2.0
 	github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf
 	github.com/crowdsecurity/dlog v0.0.0-20170105205344-4fb5f8204f26
-	github.com/crowdsecurity/grokky v0.1.0
+	github.com/crowdsecurity/grokky v0.2.1
 	github.com/crowdsecurity/machineid v1.0.2
 	github.com/davecgh/go-spew v1.1.1
 	github.com/dghubble/sling v1.3.0
@@ -67,6 +67,7 @@ require (
 	github.com/Masterminds/semver v1.5.0
 	github.com/Masterminds/sprig/v3 v3.2.2
 	github.com/aquasecurity/table v1.8.0
+	github.com/aws/aws-lambda-go v1.38.0
 	github.com/beevik/etree v1.1.0
 	github.com/blackfireio/osinfo v1.0.3
 	github.com/bluele/gcache v0.0.2
@@ -98,7 +99,6 @@ require (
 	github.com/ahmetalpbalkan/dlog v0.0.0-20170105205344-4fb5f8204f26 // indirect
 	github.com/apparentlymart/go-textseg/v13 v13.0.0 // indirect
 	github.com/asaskevich/govalidator v0.0.0-20200907205600-7a23bdc65eef // indirect
-	github.com/aws/aws-lambda-go v1.38.0 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/containerd/containerd v1.6.18 // indirect
 	github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
@@ -143,6 +143,7 @@ require (
 	github.com/klauspost/compress v1.15.7 // indirect
 	github.com/leodido/go-urn v1.2.1 // indirect
 	github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
+	github.com/magefile/mage v1.14.0 // indirect
 	github.com/mailru/easyjson v0.7.6 // indirect
 	github.com/mattn/go-colorable v0.1.12 // indirect
 	github.com/mattn/go-runewidth v0.0.13 // indirect
@@ -170,12 +171,13 @@ require (
 	github.com/shopspring/decimal v1.2.0 // indirect
 	github.com/spf13/cast v1.3.1 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
+	github.com/tetratelabs/wazero v1.0.0-rc.2 // indirect
 	github.com/tidwall/gjson v1.13.0 // indirect
 	github.com/tklauser/go-sysconf v0.3.11 // indirect
 	github.com/tklauser/numcpus v0.6.0 // indirect
 	github.com/ugorji/go/codec v1.2.6 // indirect
-	github.com/vjeantet/grok v1.0.1 // indirect
 	github.com/vmihailenco/msgpack v4.0.4+incompatible // indirect
+	github.com/wasilibs/go-re2 v0.2.1 // indirect
 	github.com/yusufpapurcu/wmi v1.2.2 // indirect
 	github.com/zclconf/go-cty v1.8.0 // indirect
 	go.mongodb.org/mongo-driver v1.9.0 // indirect

+ 12 - 3
go.sum

@@ -181,8 +181,10 @@ github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw=
 github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/crowdsecurity/dlog v0.0.0-20170105205344-4fb5f8204f26 h1:r97WNVC30Uen+7WnLs4xDScS/Ex988+id2k6mDf8psU=
 github.com/crowdsecurity/dlog v0.0.0-20170105205344-4fb5f8204f26/go.mod h1:zpv7r+7KXwgVUZnUNjyP22zc/D7LKjyoY02weH2RBbk=
-github.com/crowdsecurity/grokky v0.1.0 h1:jLUzZd3vKxYrM4hQ8n5HWLfvs5ag4UP08eT9OTekI4U=
-github.com/crowdsecurity/grokky v0.1.0/go.mod h1:fx5UYUYAFIrOUNAkFCUOM2wJcsp9EWSQE9R0/9kaFJg=
+github.com/crowdsecurity/grokky v0.2.0 h1:8u3dCU1kqSlOu2phP6S+HF2OTohZ/JBGcZCWu1dhnF0=
+github.com/crowdsecurity/grokky v0.2.0/go.mod h1:qdyzFHtnY6nmdYZJFHVbdms2HMFLiEXzmqq5tCUmiGE=
+github.com/crowdsecurity/grokky v0.2.1 h1:t4VYnDlAd0RjDM2SlILalbwfCrQxtJSMGdQOR0zwkE4=
+github.com/crowdsecurity/grokky v0.2.1/go.mod h1:33usDIYzGDsgX1kHAThCbseso6JuWNJXOzRQDGXHtWM=
 github.com/crowdsecurity/machineid v1.0.2 h1:wpkpsUghJF8Khtmn/tg6GxgdhLA1Xflerh5lirI+bdc=
 github.com/crowdsecurity/machineid v1.0.2/go.mod h1:XWUSlnS0R0+u/JK5ulidwlbceNT3ZOCKteoVQEn6Luo=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -648,6 +650,8 @@ github.com/lithammer/dedent v1.1.0 h1:VNzHMVCBNG1j0fh3OrsFRkVUwStdDArbgBWoPAffkt
 github.com/lithammer/dedent v1.1.0/go.mod h1:jrXYCQtgg0nJiN+StA2KgR7w6CiQNv9Fd/Z9BP0jIOc=
 github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
 github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
+github.com/magefile/mage v1.14.0 h1:6QDX3g6z1YvJ4olPhT1wksUcSa/V0a1B+pJb73fBjyo=
+github.com/magefile/mage v1.14.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
 github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
 github.com/mailru/easyjson v0.0.0-20180823135443-60711f1a8329/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
 github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
@@ -892,6 +896,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO
 github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
 github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
 github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
+github.com/tetratelabs/wazero v1.0.0-rc.2 h1:OA3UUynnoqxrjCQ94mpAtdO4/oMxFQVNL2BXDMOc66Q=
+github.com/tetratelabs/wazero v1.0.0-rc.2/go.mod h1:wYx2gNRg8/WihJfSDxA1TIL8H+GkfLYm+bIfbblu9VQ=
 github.com/texttheater/golang-levenshtein/levenshtein v0.0.0-20200805054039-cae8b0eaed6c h1:HelZ2kAFadG0La9d+4htN4HzQ68Bm2iM9qKMSMES6xg=
 github.com/texttheater/golang-levenshtein/levenshtein v0.0.0-20200805054039-cae8b0eaed6c/go.mod h1:JlzghshsemAMDGZLytTFY8C1JQxQPhnatWqNwUXjggo=
 github.com/tidwall/gjson v1.12.1/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
@@ -917,11 +923,14 @@ github.com/umahmood/haversine v0.0.0-20151105152445-808ab04add26 h1:UFHFmFfixpmf
 github.com/umahmood/haversine v0.0.0-20151105152445-808ab04add26/go.mod h1:IGhd0qMDsUa9acVjsbsT7bu3ktadtGOHI79+idTew/M=
 github.com/vektah/gqlparser v1.1.2/go.mod h1:1ycwN7Ij5njmMkPPAOaRFY4rET2Enx7IkVv3vaXspKw=
 github.com/vjeantet/grok v1.0.1 h1:2rhIR7J4gThTgcZ1m2JY4TrJZNgjn985U28kT2wQrJ4=
-github.com/vjeantet/grok v1.0.1/go.mod h1:ax1aAchzC6/QMXMcyzHQGZWaW1l195+uMYIkCWPCNIo=
 github.com/vmihailenco/msgpack v4.0.4+incompatible h1:dSLoQfGFAo3F6OoNhwUmLwVgaUXK79GlxNBwueZn0xI=
 github.com/vmihailenco/msgpack v4.0.4+incompatible/go.mod h1:fy3FlTQTDXWkZ7Bh6AcGMlsjHatGryHQYUTf1ShIgkk=
 github.com/vmihailenco/msgpack/v4 v4.3.12/go.mod h1:gborTTJjAo/GWTqqRjrLCn9pgNN+NXzzngzBKDPIqw4=
 github.com/vmihailenco/tagparser v0.1.1/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI=
+github.com/wasilibs/go-re2 v0.1.0 h1:+pcjlvge6E9WWU4eaMlULRikEUkhPDXSFfCF5p7htMI=
+github.com/wasilibs/go-re2 v0.1.0/go.mod h1:F91Yac+zPNDFrrd8fl4mSd7+TTu2tYiX56BEIEPQl2M=
+github.com/wasilibs/go-re2 v0.2.1 h1:bZVXxlqGHXnHlQt5ys6CsHyypsCsP952m1oKBB5O9Dc=
+github.com/wasilibs/go-re2 v0.2.1/go.mod h1:jrB0h+KSTmTwsL0lI2sEyu8TcRi1htLiR/TbSURMwMM=
 github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
 github.com/xdg-go/scram v1.0.2/go.mod h1:1WAq6h33pAW+iRreB34OORO2Nf7qel3VV3fjBj+hCSs=
 github.com/xdg-go/stringprep v1.0.2/go.mod h1:8F9zXuvzgwmyT5DUm4GUfZGDdT3W+LCvS6+da4O5kxM=

+ 35 - 12
pkg/exprhelpers/helpers.go

@@ -24,11 +24,15 @@ import (
 
 	"github.com/crowdsecurity/crowdsec/pkg/cache"
 	"github.com/crowdsecurity/crowdsec/pkg/database"
+	"github.com/crowdsecurity/crowdsec/pkg/fflag"
 	"github.com/crowdsecurity/crowdsec/pkg/types"
+
+	"github.com/wasilibs/go-re2"
 )
 
 var dataFile map[string][]string
 var dataFileRegex map[string][]*regexp.Regexp
+var dataFileRe2 map[string][]*re2.Regexp
 
 // This is used to (optionally) cache regexp results for RegexpInFile operations
 var dataFileRegexCache map[string]gcache.Cache = make(map[string]gcache.Cache)
@@ -56,6 +60,7 @@ func GetExprOptions(ctx map[string]interface{}) []expr.Option {
 func Init(databaseClient *database.Client) error {
 	dataFile = make(map[string][]string)
 	dataFileRegex = make(map[string][]*regexp.Regexp)
+	dataFileRe2 = make(map[string][]*re2.Regexp)
 	dbClient = databaseClient
 
 	exprFunctionOptions = []expr.Option{}
@@ -144,7 +149,11 @@ func FileInit(fileFolder string, filename string, fileType string) error {
 		}
 		switch fileType {
 		case "regex", "regexp":
-			dataFileRegex[filename] = append(dataFileRegex[filename], regexp.MustCompile(scanner.Text()))
+			if fflag.Re2RegexpInfileSupport.IsEnabled() {
+				dataFileRe2[filename] = append(dataFileRe2[filename], re2.MustCompile(scanner.Text()))
+			} else {
+				dataFileRegex[filename] = append(dataFileRegex[filename], regexp.MustCompile(scanner.Text()))
+			}
 		case "string":
 			dataFile[filename] = append(dataFile[filename], scanner.Text())
 		default:
@@ -286,6 +295,7 @@ func RegexpInFile(params ...any) (any, error) {
 	filename := params[1].(string)
 	var hash uint64
 	hasCache := false
+	matched := false
 
 	if _, ok := dataFileRegexCache[filename]; ok {
 		hasCache = true
@@ -295,23 +305,36 @@ func RegexpInFile(params ...any) (any, error) {
 		}
 	}
 
-	if _, ok := dataFileRegex[filename]; ok {
-		for _, re := range dataFileRegex[filename] {
-			if re.Match([]byte(data)) {
-				if hasCache {
-					dataFileRegexCache[filename].Set(hash, true)
+	switch fflag.Re2RegexpInfileSupport.IsEnabled() {
+	case true:
+		if _, ok := dataFileRe2[filename]; ok {
+			for _, re := range dataFileRe2[filename] {
+				if re.MatchString(data) {
+					matched = true
+					break
 				}
-				return true, nil
 			}
+		} else {
+			log.Errorf("file '%s' (type:regexp) not found in expr library", filename)
+			log.Errorf("expr library : %s", spew.Sdump(dataFileRe2))
+		}
+	case false:
+		if _, ok := dataFileRegex[filename]; ok {
+			for _, re := range dataFileRegex[filename] {
+				if re.MatchString(data) {
+					matched = true
+					break
+				}
+			}
+		} else {
+			log.Errorf("file '%s' (type:regexp) not found in expr library", filename)
+			log.Errorf("expr library : %s", spew.Sdump(dataFileRegex))
 		}
-	} else {
-		log.Errorf("file '%s' (type:regexp) not found in expr library", filename)
-		log.Errorf("expr library : %s", spew.Sdump(dataFileRegex))
 	}
 	if hasCache {
-		dataFileRegexCache[filename].Set(hash, false)
+		dataFileRegexCache[filename].Set(hash, matched)
 	}
-	return false, nil
+	return matched, nil
 }
 
 // func IpInRange(ip string, ipRange string) bool {

+ 11 - 0
pkg/fflag/crowdsec.go

@@ -6,6 +6,8 @@ var CscliSetup = &Feature{Name: "cscli_setup", Description: "Enable cscli setup
 var DisableHttpRetryBackoff = &Feature{Name: "disable_http_retry_backoff", Description: "Disable http retry backoff"}
 var ChunkedDecisionsStream = &Feature{Name: "chunked_decisions_stream", Description: "Enable chunked decisions stream"}
 var PapiClient = &Feature{Name: "papi_client", Description: "Enable Polling API client"}
+var Re2GrokSupport = &Feature{Name: "re2_grok_support", Description: "Enable RE2 support for GROK patterns"}
+var Re2RegexpInfileSupport = &Feature{Name: "re2_regexp_in_file_support", Description: "Enable RE2 support for RegexpInFile expr helper"}
 
 func RegisterAllFeatures() error {
 	err := Crowdsec.RegisterFeature(CscliSetup)
@@ -24,5 +26,14 @@ func RegisterAllFeatures() error {
 	if err != nil {
 		return err
 	}
+	err = Crowdsec.RegisterFeature(Re2GrokSupport)
+	if err != nil {
+		return err
+	}
+	err = Crowdsec.RegisterFeature(Re2RegexpInfileSupport)
+	if err != nil {
+		return err
+	}
+
 	return nil
 }

+ 2 - 2
pkg/parser/node.go

@@ -509,7 +509,7 @@ func (n *Node) compile(pctx *UnixParserCtx, ectx EnricherCtx) error {
 		if n.Grok.RunTimeRegexp == nil {
 			return fmt.Errorf("empty grok '%s'", n.Grok.RegexpName)
 		}
-		n.Logger.Tracef("%s regexp: %s", n.Grok.RegexpName, n.Grok.RunTimeRegexp.Regexp.String())
+		n.Logger.Tracef("%s regexp: %s", n.Grok.RegexpName, n.Grok.RunTimeRegexp.String())
 		valid = true
 	} else if n.Grok.RegexpValue != "" {
 		if strings.HasSuffix(n.Grok.RegexpValue, "\n") {
@@ -523,7 +523,7 @@ func (n *Node) compile(pctx *UnixParserCtx, ectx EnricherCtx) error {
 			// We shouldn't be here because compilation succeeded, so regexp shouldn't be nil
 			return fmt.Errorf("grok compilation failure: %s", n.Grok.RegexpValue)
 		}
-		n.Logger.Tracef("%s regexp : %s", n.Grok.RegexpValue, n.Grok.RunTimeRegexp.Regexp.String())
+		n.Logger.Tracef("%s regexp : %s", n.Grok.RegexpValue, n.Grok.RunTimeRegexp.String())
 		valid = true
 	}
 

+ 2 - 2
pkg/parser/parsing_test.go

@@ -372,10 +372,10 @@ func TestGeneratePatternsDoc(t *testing.T) {
 	}
 	log.Infof("-> %s", spew.Sdump(pctx))
 	/*don't judge me, we do it for the users*/
-	p := make(PairList, len(pctx.Grok))
+	p := make(PairList, len(pctx.Grok.Patterns))
 
 	i := 0
-	for key, val := range pctx.Grok {
+	for key, val := range pctx.Grok.Patterns {
 		p[i] = Pair{key, val}
 		p[i].Value = strings.ReplaceAll(p[i].Value, "{%{", "\\{\\%\\{")
 		i++

+ 2 - 0
pkg/parser/unix_parser.go

@@ -9,6 +9,7 @@ import (
 
 	"github.com/crowdsecurity/crowdsec/pkg/csconfig"
 	"github.com/crowdsecurity/crowdsec/pkg/cwhub"
+	"github.com/crowdsecurity/crowdsec/pkg/fflag"
 
 	"github.com/crowdsecurity/grokky"
 	log "github.com/sirupsen/logrus"
@@ -34,6 +35,7 @@ type Parsers struct {
 func Init(c map[string]interface{}) (*UnixParserCtx, error) {
 	r := UnixParserCtx{}
 	r.Grok = grokky.NewBase()
+	r.Grok.UseRe2 = fflag.Re2GrokSupport.IsEnabled()
 	files, err := os.ReadDir(c["patterns"].(string))
 	if err != nil {
 		return nil, err

+ 1 - 1
pkg/types/grok_pattern.go

@@ -34,7 +34,7 @@ type GrokPattern struct {
 	//a proper grok pattern
 	RegexpValue string `yaml:"pattern,omitempty"`
 	//the runtime form of regexpname / regexpvalue
-	RunTimeRegexp *grokky.Pattern `json:"-"` //the actual regexp
+	RunTimeRegexp grokky.Pattern `json:"-"` //the actual regexp
 	//the output of the expression is going to be the source for regexp
 	ExpValue     string      `yaml:"expression,omitempty"`
 	RunTimeValue *vm.Program `json:"-"` //the actual compiled filter