support for regexps result cache (#2104)
* support for regexps result cache : gcache + xxhash Co-authored-by: Marco Mariani <marco@crowdsec.net>
This commit is contained in:
parent
e71d146a2d
commit
5b0fe4b7f1
8 changed files with 173 additions and 53 deletions
|
@ -2,22 +2,22 @@ package main
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
v1 "github.com/crowdsecurity/crowdsec/pkg/apiserver/controllers/v1"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/cache"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/csconfig"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/cwversion"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/database"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
|
||||
leaky "github.com/crowdsecurity/crowdsec/pkg/leakybucket"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/parser"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/types"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
|
||||
"net/http"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
/*prometheus*/
|
||||
|
@ -103,6 +103,8 @@ func computeDynamicMetrics(next http.Handler, dbClient *database.Client) http.Ha
|
|||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
//update cache metrics (stash)
|
||||
cache.UpdateCacheMetrics()
|
||||
//update cache metrics (regexp)
|
||||
exprhelpers.UpdateRegexpCacheMetrics()
|
||||
|
||||
//decision metrics are only relevant for LAPI
|
||||
if dbClient == nil {
|
||||
|
@ -166,7 +168,7 @@ func registerPrometheus(config *csconfig.PrometheusCfg) {
|
|||
leaky.BucketsUnderflow, leaky.BucketsCanceled, leaky.BucketsInstantiation, leaky.BucketsOverflow,
|
||||
v1.LapiRouteHits,
|
||||
leaky.BucketsCurrentCount,
|
||||
cache.CacheMetrics)
|
||||
cache.CacheMetrics, exprhelpers.RegexpCacheMetrics)
|
||||
} else {
|
||||
log.Infof("Loading prometheus collectors")
|
||||
prometheus.MustRegister(globalParserHits, globalParserHitsOk, globalParserHitsKo,
|
||||
|
@ -175,7 +177,7 @@ func registerPrometheus(config *csconfig.PrometheusCfg) {
|
|||
v1.LapiRouteHits, v1.LapiMachineHits, v1.LapiBouncerHits, v1.LapiNilDecisions, v1.LapiNonNilDecisions, v1.LapiResponseTime,
|
||||
leaky.BucketsPour, leaky.BucketsUnderflow, leaky.BucketsCanceled, leaky.BucketsInstantiation, leaky.BucketsOverflow, leaky.BucketsCurrentCount,
|
||||
globalActiveDecisions, globalAlerts,
|
||||
cache.CacheMetrics)
|
||||
cache.CacheMetrics, exprhelpers.RegexpCacheMetrics)
|
||||
|
||||
}
|
||||
}
|
||||
|
|
2
go.mod
2
go.mod
|
@ -71,6 +71,7 @@ require (
|
|||
github.com/beevik/etree v1.1.0
|
||||
github.com/blackfireio/osinfo v1.0.3
|
||||
github.com/bluele/gcache v0.0.2
|
||||
github.com/cespare/xxhash/v2 v2.1.2
|
||||
github.com/goccy/go-yaml v1.9.7
|
||||
github.com/gofrs/uuid v4.0.0+incompatible
|
||||
github.com/golang-jwt/jwt/v4 v4.2.0
|
||||
|
@ -98,7 +99,6 @@ require (
|
|||
github.com/apparentlymart/go-textseg/v13 v13.0.0 // indirect
|
||||
github.com/asaskevich/govalidator v0.0.0-20200907205600-7a23bdc65eef // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.1.2 // indirect
|
||||
github.com/containerd/containerd v1.6.18 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
|
||||
github.com/docker/distribution v2.8.0+incompatible // indirect
|
||||
|
|
6
pkg/cache/cache.go
vendored
6
pkg/cache/cache.go
vendored
|
@ -4,10 +4,10 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/bluele/gcache"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/types"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/sirupsen/logrus"
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/crowdsecurity/crowdsec/pkg/types"
|
||||
)
|
||||
|
||||
var Caches []gcache.Cache
|
||||
|
@ -52,7 +52,7 @@ func CacheInit(cfg CacheCfg) error {
|
|||
cfg.LogLevel = new(log.Level)
|
||||
*cfg.LogLevel = log.InfoLevel
|
||||
}
|
||||
var clog = logrus.New()
|
||||
var clog = log.New()
|
||||
if err := types.ConfigureLogger(clog); err != nil {
|
||||
log.Fatalf("While creating cache logger : %s", err)
|
||||
}
|
||||
|
|
|
@ -12,16 +12,33 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/bluele/gcache"
|
||||
"github.com/c-robinson/iplib"
|
||||
"github.com/cespare/xxhash/v2"
|
||||
"github.com/davecgh/go-spew/spew"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/crowdsecurity/crowdsec/pkg/cache"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/database"
|
||||
"github.com/davecgh/go-spew/spew"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/types"
|
||||
)
|
||||
|
||||
var dataFile map[string][]string
|
||||
var dataFileRegex map[string][]*regexp.Regexp
|
||||
|
||||
// This is used to (optionally) cache regexp results for RegexpInFile operations
|
||||
var dataFileRegexCache map[string]gcache.Cache = make(map[string]gcache.Cache)
|
||||
|
||||
/*prometheus*/
|
||||
var RegexpCacheMetrics = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "cs_regexp_cache_size",
|
||||
Help: "Entries per regexp cache.",
|
||||
},
|
||||
[]string{"name"},
|
||||
)
|
||||
|
||||
var dbClient *database.Client
|
||||
|
||||
func Get(arr []string, index int) string {
|
||||
|
@ -116,6 +133,54 @@ func Init(databaseClient *database.Client) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func RegexpCacheInit(filename string, CacheCfg types.DataSource) error {
|
||||
|
||||
//cache is explicitly disabled
|
||||
if CacheCfg.Cache != nil && !*CacheCfg.Cache {
|
||||
return nil
|
||||
}
|
||||
//cache is implicitly disabled if no cache config is provided
|
||||
if CacheCfg.Strategy == nil && CacheCfg.TTL == nil && CacheCfg.Size == nil {
|
||||
return nil
|
||||
}
|
||||
//cache is enabled
|
||||
|
||||
if CacheCfg.Size == nil {
|
||||
CacheCfg.Size = types.IntPtr(50)
|
||||
}
|
||||
|
||||
gc := gcache.New(*CacheCfg.Size)
|
||||
|
||||
if CacheCfg.Strategy == nil {
|
||||
CacheCfg.Strategy = types.StrPtr("LRU")
|
||||
}
|
||||
switch *CacheCfg.Strategy {
|
||||
case "LRU":
|
||||
gc = gc.LRU()
|
||||
case "LFU":
|
||||
gc = gc.LFU()
|
||||
case "ARC":
|
||||
gc = gc.ARC()
|
||||
default:
|
||||
return fmt.Errorf("unknown cache strategy '%s'", *CacheCfg.Strategy)
|
||||
}
|
||||
|
||||
if CacheCfg.TTL != nil {
|
||||
gc.Expiration(*CacheCfg.TTL)
|
||||
}
|
||||
cache := gc.Build()
|
||||
dataFileRegexCache[filename] = cache
|
||||
return nil
|
||||
}
|
||||
|
||||
// UpdateCacheMetrics is called directly by the prom handler
|
||||
func UpdateRegexpCacheMetrics() {
|
||||
RegexpCacheMetrics.Reset()
|
||||
for name := range dataFileRegexCache {
|
||||
RegexpCacheMetrics.With(prometheus.Labels{"name": name}).Set(float64(dataFileRegexCache[name].Len(true)))
|
||||
}
|
||||
}
|
||||
|
||||
func FileInit(fileFolder string, filename string, fileType string) error {
|
||||
log.Debugf("init (folder:%s) (file:%s) (type:%s)", fileFolder, filename, fileType)
|
||||
filepath := path.Join(fileFolder, filename)
|
||||
|
@ -192,9 +257,24 @@ func File(filename string) []string {
|
|||
}
|
||||
|
||||
func RegexpInFile(data string, filename string) bool {
|
||||
|
||||
var hash uint64
|
||||
hasCache := false
|
||||
|
||||
if _, ok := dataFileRegexCache[filename]; ok {
|
||||
hasCache = true
|
||||
hash = xxhash.Sum64String(data)
|
||||
if val, err := dataFileRegexCache[filename].Get(hash); err == nil {
|
||||
return val.(bool)
|
||||
}
|
||||
}
|
||||
|
||||
if _, ok := dataFileRegex[filename]; ok {
|
||||
for _, re := range dataFileRegex[filename] {
|
||||
if re.Match([]byte(data)) {
|
||||
if hasCache {
|
||||
dataFileRegexCache[filename].Set(hash, true)
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
@ -202,6 +282,9 @@ func RegexpInFile(data string, filename string) bool {
|
|||
log.Errorf("file '%s' (type:regexp) not found in expr library", filename)
|
||||
log.Errorf("expr library : %s", spew.Sdump(dataFileRegex))
|
||||
}
|
||||
if hasCache {
|
||||
dataFileRegexCache[filename].Set(hash, false)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
|
|
|
@ -4,22 +4,20 @@ import (
|
|||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/antonmedv/expr"
|
||||
"github.com/pkg/errors"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/crowdsecurity/crowdsec/pkg/csconfig"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/cstest"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/database"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/models"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/types"
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"testing"
|
||||
|
||||
"github.com/antonmedv/expr"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -29,24 +27,21 @@ var (
|
|||
func getDBClient(t *testing.T) *database.Client {
|
||||
t.Helper()
|
||||
dbPath, err := os.CreateTemp("", "*sqlite")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
require.NoError(t, err)
|
||||
|
||||
testDbClient, err := database.NewClient(&csconfig.DatabaseCfg{
|
||||
Type: "sqlite",
|
||||
DbName: "crowdsec",
|
||||
DbPath: dbPath.Name(),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
require.NoError(t, err)
|
||||
|
||||
return testDbClient
|
||||
}
|
||||
|
||||
func TestVisitor(t *testing.T) {
|
||||
if err := Init(nil); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
err := Init(nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
|
@ -130,6 +125,39 @@ func TestVisitor(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestRegexpCacheBehavior(t *testing.T) {
|
||||
err := Init(nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
filename := "test_data_re.txt"
|
||||
err = FileInit(TestFolder, filename, "regex")
|
||||
require.NoError(t, err)
|
||||
|
||||
//cache with no TTL
|
||||
err = RegexpCacheInit(filename, types.DataSource{Type: "regex", Size: types.IntPtr(1)})
|
||||
require.NoError(t, err)
|
||||
|
||||
ret := RegexpInFile("crowdsec", filename)
|
||||
assert.False(t, ret)
|
||||
assert.Equal(t, 1, dataFileRegexCache[filename].Len(false))
|
||||
|
||||
ret = RegexpInFile("Crowdsec", filename)
|
||||
assert.True(t, ret)
|
||||
assert.Equal(t, 1, dataFileRegexCache[filename].Len(false))
|
||||
|
||||
//cache with TTL
|
||||
ttl := 500 * time.Millisecond
|
||||
err = RegexpCacheInit(filename, types.DataSource{Type: "regex", Size: types.IntPtr(2), TTL: &ttl})
|
||||
require.NoError(t, err)
|
||||
|
||||
ret = RegexpInFile("crowdsec", filename)
|
||||
assert.False(t, ret)
|
||||
assert.Equal(t, 1, dataFileRegexCache[filename].Len(true))
|
||||
|
||||
time.Sleep(1 * time.Second)
|
||||
assert.Equal(t, 0, dataFileRegexCache[filename].Len(true))
|
||||
}
|
||||
|
||||
func TestRegexpInFile(t *testing.T) {
|
||||
if err := Init(nil); err != nil {
|
||||
log.Fatal(err)
|
||||
|
@ -449,7 +477,7 @@ func TestAtof(t *testing.T) {
|
|||
expectedFloat := 1.5
|
||||
|
||||
if Atof(testFloat) != expectedFloat {
|
||||
t.Fatalf("Atof should returned 1.5 as a float")
|
||||
t.Fatalf("Atof should return 1.5 as a float")
|
||||
}
|
||||
|
||||
log.Printf("test 'Atof()' : OK")
|
||||
|
@ -459,7 +487,7 @@ func TestAtof(t *testing.T) {
|
|||
expectedFloat = 0.0
|
||||
|
||||
if Atof(testFloat) != expectedFloat {
|
||||
t.Fatalf("Atof should returned a negative value (error) as a float got")
|
||||
t.Fatalf("Atof should return a negative value (error) as a float got")
|
||||
}
|
||||
|
||||
log.Printf("test 'Atof()' : OK")
|
||||
|
@ -470,7 +498,7 @@ func TestUpper(t *testing.T) {
|
|||
expectedStr := "TEST"
|
||||
|
||||
if Upper(testStr) != expectedStr {
|
||||
t.Fatalf("Upper() should returned test in upper case")
|
||||
t.Fatalf("Upper() should return test in upper case")
|
||||
}
|
||||
|
||||
log.Printf("test 'Upper()' : OK")
|
||||
|
@ -503,7 +531,7 @@ func TestParseUri(t *testing.T) {
|
|||
"ParseUri": ParseUri,
|
||||
},
|
||||
code: "ParseUri(uri)",
|
||||
result: map[string][]string{"a": []string{"1"}, "b": []string{"2"}},
|
||||
result: map[string][]string{"a": {"1"}, "b": {"2"}},
|
||||
err: "",
|
||||
},
|
||||
{
|
||||
|
@ -523,7 +551,7 @@ func TestParseUri(t *testing.T) {
|
|||
"ParseUri": ParseUri,
|
||||
},
|
||||
code: "ParseUri(uri)",
|
||||
result: map[string][]string{"a": []string{"1"}, "b": []string{"2?"}},
|
||||
result: map[string][]string{"a": {"1"}, "b": {"2?"}},
|
||||
err: "",
|
||||
},
|
||||
{
|
||||
|
@ -533,7 +561,7 @@ func TestParseUri(t *testing.T) {
|
|||
"ParseUri": ParseUri,
|
||||
},
|
||||
code: "ParseUri(uri)",
|
||||
result: map[string][]string{"?": []string{"", "123"}},
|
||||
result: map[string][]string{"?": {"", "123"}},
|
||||
err: "",
|
||||
},
|
||||
{
|
||||
|
|
|
@ -11,24 +11,20 @@ import (
|
|||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/crowdsecurity/crowdsec/pkg/alertcontext"
|
||||
|
||||
"github.com/crowdsecurity/crowdsec/pkg/csconfig"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/cwhub"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/cwversion"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/types"
|
||||
|
||||
"github.com/davecgh/go-spew/spew"
|
||||
"github.com/sirupsen/logrus"
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/antonmedv/expr"
|
||||
"github.com/antonmedv/expr/vm"
|
||||
"github.com/davecgh/go-spew/spew"
|
||||
"github.com/goombaio/namegenerator"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"gopkg.in/tomb.v2"
|
||||
yaml "gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/crowdsecurity/crowdsec/pkg/alertcontext"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/csconfig"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/cwhub"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/cwversion"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/types"
|
||||
)
|
||||
|
||||
// BucketFactory struct holds all fields for any bucket configuration. This is to have a
|
||||
|
@ -254,7 +250,7 @@ func LoadBuckets(cscfg *csconfig.CrowdsecServiceCfg, files []string, tomb *tomb.
|
|||
func LoadBucket(bucketFactory *BucketFactory, tomb *tomb.Tomb) error {
|
||||
var err error
|
||||
if bucketFactory.Debug {
|
||||
var clog = logrus.New()
|
||||
var clog = log.New()
|
||||
if err := types.ConfigureLogger(clog); err != nil {
|
||||
log.Fatalf("While creating bucket-specific logger : %s", err)
|
||||
}
|
||||
|
@ -374,6 +370,9 @@ func LoadBucket(bucketFactory *BucketFactory, tomb *tomb.Tomb) error {
|
|||
if err != nil {
|
||||
bucketFactory.logger.Errorf("unable to init data for file '%s': %s", data.DestPath, err)
|
||||
}
|
||||
if data.Type == "regexp" { //cache only makes sense for regexp
|
||||
exprhelpers.RegexpCacheInit(data.DestPath, *data)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -16,13 +16,12 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/goombaio/namegenerator"
|
||||
log "github.com/sirupsen/logrus"
|
||||
yaml "gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/crowdsecurity/crowdsec/pkg/cwversion"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/goombaio/namegenerator"
|
||||
yaml "gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
var seed namegenerator.Generator = namegenerator.NewNameGenerator(time.Now().UTC().UnixNano())
|
||||
|
@ -116,6 +115,9 @@ func LoadStages(stageFiles []Stagefile, pctx *UnixParserCtx, ectx EnricherCtx) (
|
|||
if err != nil {
|
||||
log.Error(err)
|
||||
}
|
||||
if data.Type == "regexp" { //cache only makes sense for regexp
|
||||
exprhelpers.RegexpCacheInit(data.DestPath, *data)
|
||||
}
|
||||
}
|
||||
}
|
||||
nodes = append(nodes, node)
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
"time"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
@ -14,6 +15,11 @@ type DataSource struct {
|
|||
SourceURL string `yaml:"source_url"`
|
||||
DestPath string `yaml:"dest_file"`
|
||||
Type string `yaml:"type"`
|
||||
//Control cache strategy on expensive regexps
|
||||
Cache *bool `yaml:"cache"`
|
||||
Strategy *string `yaml:"strategy"`
|
||||
Size *int `yaml:"size"`
|
||||
TTL *time.Duration `yaml:"ttl"`
|
||||
}
|
||||
|
||||
type DataSet struct {
|
||||
|
|
Loading…
Reference in a new issue