Преглед изворни кода

support for regexps result cache (#2104)

* support for regexps result cache : gcache + xxhash

Co-authored-by: Marco Mariani <marco@crowdsec.net>
Thibault "bui" Koechlin пре 2 година
родитељ
комит
5b0fe4b7f1

+ 10 - 8
cmd/crowdsec/metrics.go

@@ -2,22 +2,22 @@ package main
 
 import (
 	"fmt"
+	"net/http"
 	"time"
 
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promhttp"
+	log "github.com/sirupsen/logrus"
+
 	v1 "github.com/crowdsecurity/crowdsec/pkg/apiserver/controllers/v1"
 	"github.com/crowdsecurity/crowdsec/pkg/cache"
 	"github.com/crowdsecurity/crowdsec/pkg/csconfig"
 	"github.com/crowdsecurity/crowdsec/pkg/cwversion"
 	"github.com/crowdsecurity/crowdsec/pkg/database"
+	"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
 	leaky "github.com/crowdsecurity/crowdsec/pkg/leakybucket"
 	"github.com/crowdsecurity/crowdsec/pkg/parser"
 	"github.com/crowdsecurity/crowdsec/pkg/types"
-	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/client_golang/prometheus/promhttp"
-
-	"net/http"
-
-	log "github.com/sirupsen/logrus"
 )
 
 /*prometheus*/
@@ -103,6 +103,8 @@ func computeDynamicMetrics(next http.Handler, dbClient *database.Client) http.Ha
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		//update cache metrics (stash)
 		cache.UpdateCacheMetrics()
+		//update cache metrics (regexp)
+		exprhelpers.UpdateRegexpCacheMetrics()
 
 		//decision metrics are only relevant for LAPI
 		if dbClient == nil {
@@ -166,7 +168,7 @@ func registerPrometheus(config *csconfig.PrometheusCfg) {
 			leaky.BucketsUnderflow, leaky.BucketsCanceled, leaky.BucketsInstantiation, leaky.BucketsOverflow,
 			v1.LapiRouteHits,
 			leaky.BucketsCurrentCount,
-			cache.CacheMetrics)
+			cache.CacheMetrics, exprhelpers.RegexpCacheMetrics)
 	} else {
 		log.Infof("Loading prometheus collectors")
 		prometheus.MustRegister(globalParserHits, globalParserHitsOk, globalParserHitsKo,
@@ -175,7 +177,7 @@ func registerPrometheus(config *csconfig.PrometheusCfg) {
 			v1.LapiRouteHits, v1.LapiMachineHits, v1.LapiBouncerHits, v1.LapiNilDecisions, v1.LapiNonNilDecisions, v1.LapiResponseTime,
 			leaky.BucketsPour, leaky.BucketsUnderflow, leaky.BucketsCanceled, leaky.BucketsInstantiation, leaky.BucketsOverflow, leaky.BucketsCurrentCount,
 			globalActiveDecisions, globalAlerts,
-			cache.CacheMetrics)
+			cache.CacheMetrics, exprhelpers.RegexpCacheMetrics)
 
 	}
 }

+ 1 - 1
go.mod

@@ -71,6 +71,7 @@ require (
 	github.com/beevik/etree v1.1.0
 	github.com/blackfireio/osinfo v1.0.3
 	github.com/bluele/gcache v0.0.2
+	github.com/cespare/xxhash/v2 v2.1.2
 	github.com/goccy/go-yaml v1.9.7
 	github.com/gofrs/uuid v4.0.0+incompatible
 	github.com/golang-jwt/jwt/v4 v4.2.0
@@ -98,7 +99,6 @@ require (
 	github.com/apparentlymart/go-textseg/v13 v13.0.0 // indirect
 	github.com/asaskevich/govalidator v0.0.0-20200907205600-7a23bdc65eef // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
-	github.com/cespare/xxhash/v2 v2.1.2 // indirect
 	github.com/containerd/containerd v1.6.18 // indirect
 	github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
 	github.com/docker/distribution v2.8.0+incompatible // indirect

+ 3 - 3
pkg/cache/cache.go

@@ -4,10 +4,10 @@ import (
 	"time"
 
 	"github.com/bluele/gcache"
-	"github.com/crowdsecurity/crowdsec/pkg/types"
 	"github.com/prometheus/client_golang/prometheus"
-	"github.com/sirupsen/logrus"
 	log "github.com/sirupsen/logrus"
+
+	"github.com/crowdsecurity/crowdsec/pkg/types"
 )
 
 var Caches []gcache.Cache
@@ -52,7 +52,7 @@ func CacheInit(cfg CacheCfg) error {
 		cfg.LogLevel = new(log.Level)
 		*cfg.LogLevel = log.InfoLevel
 	}
-	var clog = logrus.New()
+	var clog = log.New()
 	if err := types.ConfigureLogger(clog); err != nil {
 		log.Fatalf("While creating cache logger : %s", err)
 	}

+ 85 - 2
pkg/exprhelpers/exprlib.go

@@ -12,16 +12,33 @@ import (
 	"strings"
 	"time"
 
+	"github.com/bluele/gcache"
 	"github.com/c-robinson/iplib"
+	"github.com/cespare/xxhash/v2"
+	"github.com/davecgh/go-spew/spew"
+	"github.com/prometheus/client_golang/prometheus"
+	log "github.com/sirupsen/logrus"
 
 	"github.com/crowdsecurity/crowdsec/pkg/cache"
 	"github.com/crowdsecurity/crowdsec/pkg/database"
-	"github.com/davecgh/go-spew/spew"
-	log "github.com/sirupsen/logrus"
+	"github.com/crowdsecurity/crowdsec/pkg/types"
 )
 
 var dataFile map[string][]string
 var dataFileRegex map[string][]*regexp.Regexp
+
+// This is used to (optionally) cache regexp results for RegexpInFile operations
+var dataFileRegexCache map[string]gcache.Cache = make(map[string]gcache.Cache)
+
+/*prometheus*/
+var RegexpCacheMetrics = prometheus.NewGaugeVec(
+	prometheus.GaugeOpts{
+		Name: "cs_regexp_cache_size",
+		Help: "Entries per regexp cache.",
+	},
+	[]string{"name"},
+)
+
 var dbClient *database.Client
 
 func Get(arr []string, index int) string {
@@ -116,6 +133,54 @@ func Init(databaseClient *database.Client) error {
 	return nil
 }
 
+func RegexpCacheInit(filename string, CacheCfg types.DataSource) error {
+
+	//cache is explicitly disabled
+	if CacheCfg.Cache != nil && !*CacheCfg.Cache {
+		return nil
+	}
+	//cache is implicitly disabled if no cache config is provided
+	if CacheCfg.Strategy == nil && CacheCfg.TTL == nil && CacheCfg.Size == nil {
+		return nil
+	}
+	//cache is enabled
+
+	if CacheCfg.Size == nil {
+		CacheCfg.Size = types.IntPtr(50)
+	}
+
+	gc := gcache.New(*CacheCfg.Size)
+
+	if CacheCfg.Strategy == nil {
+		CacheCfg.Strategy = types.StrPtr("LRU")
+	}
+	switch *CacheCfg.Strategy {
+	case "LRU":
+		gc = gc.LRU()
+	case "LFU":
+		gc = gc.LFU()
+	case "ARC":
+		gc = gc.ARC()
+	default:
+		return fmt.Errorf("unknown cache strategy '%s'", *CacheCfg.Strategy)
+	}
+
+	if CacheCfg.TTL != nil {
+		gc.Expiration(*CacheCfg.TTL)
+	}
+	cache := gc.Build()
+	dataFileRegexCache[filename] = cache
+	return nil
+}
+
+// UpdateCacheMetrics is called directly by the prom handler
+func UpdateRegexpCacheMetrics() {
+	RegexpCacheMetrics.Reset()
+	for name := range dataFileRegexCache {
+		RegexpCacheMetrics.With(prometheus.Labels{"name": name}).Set(float64(dataFileRegexCache[name].Len(true)))
+	}
+}
+
 func FileInit(fileFolder string, filename string, fileType string) error {
 	log.Debugf("init (folder:%s) (file:%s) (type:%s)", fileFolder, filename, fileType)
 	filepath := path.Join(fileFolder, filename)
@@ -192,9 +257,24 @@ func File(filename string) []string {
 }
 
 func RegexpInFile(data string, filename string) bool {
+
+	var hash uint64
+	hasCache := false
+
+	if _, ok := dataFileRegexCache[filename]; ok {
+		hasCache = true
+		hash = xxhash.Sum64String(data)
+		if val, err := dataFileRegexCache[filename].Get(hash); err == nil {
+			return val.(bool)
+		}
+	}
+
 	if _, ok := dataFileRegex[filename]; ok {
 		for _, re := range dataFileRegex[filename] {
 			if re.Match([]byte(data)) {
+				if hasCache {
+					dataFileRegexCache[filename].Set(hash, true)
+				}
 				return true
 			}
 		}
@@ -202,6 +282,9 @@ func RegexpInFile(data string, filename string) bool {
 		log.Errorf("file '%s' (type:regexp) not found in expr library", filename)
 		log.Errorf("expr library : %s", spew.Sdump(dataFileRegex))
 	}
+	if hasCache {
+		dataFileRegexCache[filename].Set(hash, false)
+	}
 	return false
 }
 

+ 50 - 22
pkg/exprhelpers/exprlib_test.go

@@ -4,22 +4,20 @@ import (
 	"context"
 	"fmt"
 	"os"
+	"testing"
 	"time"
 
+	"github.com/antonmedv/expr"
 	"github.com/pkg/errors"
+	log "github.com/sirupsen/logrus"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 
 	"github.com/crowdsecurity/crowdsec/pkg/csconfig"
 	"github.com/crowdsecurity/crowdsec/pkg/cstest"
 	"github.com/crowdsecurity/crowdsec/pkg/database"
 	"github.com/crowdsecurity/crowdsec/pkg/models"
 	"github.com/crowdsecurity/crowdsec/pkg/types"
-	log "github.com/sirupsen/logrus"
-
-	"testing"
-
-	"github.com/antonmedv/expr"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
 )
 
 var (
@@ -29,24 +27,21 @@ var (
 func getDBClient(t *testing.T) *database.Client {
 	t.Helper()
 	dbPath, err := os.CreateTemp("", "*sqlite")
-	if err != nil {
-		t.Fatal(err)
-	}
+	require.NoError(t, err)
+
 	testDbClient, err := database.NewClient(&csconfig.DatabaseCfg{
 		Type:   "sqlite",
 		DbName: "crowdsec",
 		DbPath: dbPath.Name(),
 	})
-	if err != nil {
-		t.Fatal(err)
-	}
+	require.NoError(t, err)
+
 	return testDbClient
 }
 
 func TestVisitor(t *testing.T) {
-	if err := Init(nil); err != nil {
-		log.Fatal(err)
-	}
+	err := Init(nil)
+	require.NoError(t, err)
 
 	tests := []struct {
 		name   string
@@ -130,6 +125,39 @@ func TestVisitor(t *testing.T) {
 	}
 }
 
+func TestRegexpCacheBehavior(t *testing.T) {
+	err := Init(nil)
+	require.NoError(t, err)
+
+	filename := "test_data_re.txt"
+	err = FileInit(TestFolder, filename, "regex")
+	require.NoError(t, err)
+
+	//cache with no TTL
+	err = RegexpCacheInit(filename, types.DataSource{Type: "regex", Size: types.IntPtr(1)})
+	require.NoError(t, err)
+
+	ret := RegexpInFile("crowdsec", filename)
+	assert.False(t, ret)
+	assert.Equal(t, 1, dataFileRegexCache[filename].Len(false))
+
+	ret = RegexpInFile("Crowdsec", filename)
+	assert.True(t, ret)
+	assert.Equal(t, 1, dataFileRegexCache[filename].Len(false))
+
+	//cache with TTL
+	ttl := 500 * time.Millisecond
+	err = RegexpCacheInit(filename, types.DataSource{Type: "regex", Size: types.IntPtr(2), TTL: &ttl})
+	require.NoError(t, err)
+
+	ret = RegexpInFile("crowdsec", filename)
+	assert.False(t, ret)
+	assert.Equal(t, 1, dataFileRegexCache[filename].Len(true))
+
+	time.Sleep(1 * time.Second)
+	assert.Equal(t, 0, dataFileRegexCache[filename].Len(true))
+}
+
 func TestRegexpInFile(t *testing.T) {
 	if err := Init(nil); err != nil {
 		log.Fatal(err)
@@ -449,7 +477,7 @@ func TestAtof(t *testing.T) {
 	expectedFloat := 1.5
 
 	if Atof(testFloat) != expectedFloat {
-		t.Fatalf("Atof should returned 1.5 as a float")
+		t.Fatalf("Atof should return 1.5 as a float")
 	}
 
 	log.Printf("test 'Atof()' : OK")
@@ -459,7 +487,7 @@ func TestAtof(t *testing.T) {
 	expectedFloat = 0.0
 
 	if Atof(testFloat) != expectedFloat {
-		t.Fatalf("Atof should returned a negative value (error) as a float got")
+		t.Fatalf("Atof should return a negative value (error) as a float got")
 	}
 
 	log.Printf("test 'Atof()' : OK")
@@ -470,7 +498,7 @@ func TestUpper(t *testing.T) {
 	expectedStr := "TEST"
 
 	if Upper(testStr) != expectedStr {
-		t.Fatalf("Upper() should returned test in upper case")
+		t.Fatalf("Upper() should return test in upper case")
 	}
 
 	log.Printf("test 'Upper()' : OK")
@@ -503,7 +531,7 @@ func TestParseUri(t *testing.T) {
 				"ParseUri": ParseUri,
 			},
 			code:   "ParseUri(uri)",
-			result: map[string][]string{"a": []string{"1"}, "b": []string{"2"}},
+			result: map[string][]string{"a": {"1"}, "b": {"2"}},
 			err:    "",
 		},
 		{
@@ -523,7 +551,7 @@ func TestParseUri(t *testing.T) {
 				"ParseUri": ParseUri,
 			},
 			code:   "ParseUri(uri)",
-			result: map[string][]string{"a": []string{"1"}, "b": []string{"2?"}},
+			result: map[string][]string{"a": {"1"}, "b": {"2?"}},
 			err:    "",
 		},
 		{
@@ -533,7 +561,7 @@ func TestParseUri(t *testing.T) {
 				"ParseUri": ParseUri,
 			},
 			code:   "ParseUri(uri)",
-			result: map[string][]string{"?": []string{"", "123"}},
+			result: map[string][]string{"?": {"", "123"}},
 			err:    "",
 		},
 		{

+ 11 - 12
pkg/leakybucket/manager_load.go

@@ -11,24 +11,20 @@ import (
 	"sync"
 	"time"
 
-	"github.com/crowdsecurity/crowdsec/pkg/alertcontext"
-
-	"github.com/crowdsecurity/crowdsec/pkg/csconfig"
-	"github.com/crowdsecurity/crowdsec/pkg/cwhub"
-	"github.com/crowdsecurity/crowdsec/pkg/cwversion"
-	"github.com/crowdsecurity/crowdsec/pkg/types"
-
-	"github.com/davecgh/go-spew/spew"
-	"github.com/sirupsen/logrus"
-	log "github.com/sirupsen/logrus"
-
 	"github.com/antonmedv/expr"
 	"github.com/antonmedv/expr/vm"
+	"github.com/davecgh/go-spew/spew"
 	"github.com/goombaio/namegenerator"
+	log "github.com/sirupsen/logrus"
 	"gopkg.in/tomb.v2"
 	yaml "gopkg.in/yaml.v2"
 
+	"github.com/crowdsecurity/crowdsec/pkg/alertcontext"
+	"github.com/crowdsecurity/crowdsec/pkg/csconfig"
+	"github.com/crowdsecurity/crowdsec/pkg/cwhub"
+	"github.com/crowdsecurity/crowdsec/pkg/cwversion"
 	"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
+	"github.com/crowdsecurity/crowdsec/pkg/types"
 )
 
 // BucketFactory struct holds all fields for any bucket configuration. This is to have a
@@ -254,7 +250,7 @@ func LoadBuckets(cscfg *csconfig.CrowdsecServiceCfg, files []string, tomb *tomb.
 func LoadBucket(bucketFactory *BucketFactory, tomb *tomb.Tomb) error {
 	var err error
 	if bucketFactory.Debug {
-		var clog = logrus.New()
+		var clog = log.New()
 		if err := types.ConfigureLogger(clog); err != nil {
 			log.Fatalf("While creating bucket-specific logger : %s", err)
 		}
@@ -374,6 +370,9 @@ func LoadBucket(bucketFactory *BucketFactory, tomb *tomb.Tomb) error {
 			if err != nil {
 				bucketFactory.logger.Errorf("unable to init data for file '%s': %s", data.DestPath, err)
 			}
+			if data.Type == "regexp" { //cache only makes sense for regexp
+				exprhelpers.RegexpCacheInit(data.DestPath, *data)
+			}
 		}
 	}
 

+ 7 - 5
pkg/parser/stage.go

@@ -16,13 +16,12 @@ import (
 	"strings"
 	"time"
 
-	"github.com/crowdsecurity/crowdsec/pkg/cwversion"
-	"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
-
-	log "github.com/sirupsen/logrus"
-
 	"github.com/goombaio/namegenerator"
+	log "github.com/sirupsen/logrus"
 	yaml "gopkg.in/yaml.v2"
+
+	"github.com/crowdsecurity/crowdsec/pkg/cwversion"
+	"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
 )
 
 var seed namegenerator.Generator = namegenerator.NewNameGenerator(time.Now().UTC().UnixNano())
@@ -116,6 +115,9 @@ func LoadStages(stageFiles []Stagefile, pctx *UnixParserCtx, ectx EnricherCtx) (
 					if err != nil {
 						log.Error(err)
 					}
+					if data.Type == "regexp" { //cache only makes sense for regexp
+						exprhelpers.RegexpCacheInit(data.DestPath, *data)
+					}
 				}
 			}
 			nodes = append(nodes, node)

+ 6 - 0
pkg/types/dataset.go

@@ -6,6 +6,7 @@ import (
 	"net/http"
 	"os"
 	"path"
+	"time"
 
 	log "github.com/sirupsen/logrus"
 )
@@ -14,6 +15,11 @@ type DataSource struct {
 	SourceURL string `yaml:"source_url"`
 	DestPath  string `yaml:"dest_file"`
 	Type      string `yaml:"type"`
+	//Control cache strategy on expensive regexps
+	Cache    *bool          `yaml:"cache"`
+	Strategy *string        `yaml:"strategy"`
+	Size     *int           `yaml:"size"`
+	TTL      *time.Duration `yaml:"ttl"`
 }
 
 type DataSet struct {