Add bayesian bucket type (#2290)

This commit is contained in:
Emanuel Seemann 2023-06-21 15:08:27 +02:00 committed by GitHub
parent da6106bd23
commit 40e6b205bc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 471 additions and 0 deletions

View file

@ -48,6 +48,15 @@ infinite leakspeed (it never overflows, nor leaks). Nevertheless,
the event is raised after a fixed duration. The option is called
duration.
## Bayesian
A Bayesian is a special bucket that runs bayesian inference instead of
counting events. Each event must have its likelihoods specified in the
yaml file under `prob_given_benign` and `prob_given_evil`. The bucket
will continue evaluating events until the posterior goes above the
threshold (triggering the overflow) or the duration (specified by leakspeed)
expires.
## Available configuration options for buckets
### Fields for standard buckets
@ -102,6 +111,22 @@ Capacity and leakspeed are not relevant for this kind of bucket.
Nevertheless, this kind of bucket is often used with an infinite
leakspeed and an infinite capacity [capacity set to -1 for now].
#### Bayesian
* bayesian_prior: The prior to start with
* bayesian_threshold: The threshold for the posterior to trigger the overflow.
* bayesian_conditions: List of Bayesian conditions with likelihoods
Bayesian Conditions are built from:
* condition: The expr for this specific condition to be true
* prob_given_evil: The likelihood an IP satisfies the condition given the fact
that it is a maliscious IP
* prob_given_benign: The likelihood an IP satisfies the condition given the fact
that it is a benign IP
* guillotine: Bool to stop the condition from getting evaluated if it has
evaluated to true once. This should be used if evaluating the condition is
computationally expensive.
## Add examples here

163
pkg/leakybucket/bayesian.go Normal file
View file

@ -0,0 +1,163 @@
package leakybucket
import (
"fmt"
"github.com/antonmedv/expr"
"github.com/antonmedv/expr/vm"
"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
"github.com/crowdsecurity/crowdsec/pkg/types"
)
type RawBayesianCondition struct {
ConditionalFilterName string `yaml:"condition"`
ProbGivenEvil float32 `yaml:"prob_given_evil"`
ProbGivenBenign float32 `yaml:"prob_given_benign"`
Guillotine bool `yaml:"guillotine,omitempty"`
}
type BayesianEvent struct {
rawCondition RawBayesianCondition
conditionalFilterRuntime *vm.Program
guillotineState bool
}
type BayesianBucket struct {
bayesianEventArray []*BayesianEvent
prior float32
threshold float32
posterior float32
DumbProcessor
}
func updateProbability(prior, probGivenEvil, ProbGivenBenign float32) float32 {
numerator := probGivenEvil * prior
denominator := numerator + ProbGivenBenign*(1-prior)
return numerator / denominator
}
func (c *BayesianBucket) OnBucketInit(g *BucketFactory) error {
var err error
BayesianEventArray := make([]*BayesianEvent, len(g.BayesianConditions))
if conditionalExprCache == nil {
conditionalExprCache = make(map[string]vm.Program)
}
conditionalExprCacheLock.Lock()
for index, bcond := range g.BayesianConditions {
var bayesianEvent BayesianEvent
bayesianEvent.rawCondition = bcond
err = bayesianEvent.compileCondition()
if err != nil {
return err
}
BayesianEventArray[index] = &bayesianEvent
}
conditionalExprCacheLock.Unlock()
c.bayesianEventArray = BayesianEventArray
c.prior = g.BayesianPrior
c.threshold = g.BayesianThreshold
return err
}
func (c *BayesianBucket) AfterBucketPour(b *BucketFactory) func(types.Event, *Leaky) *types.Event {
return func(msg types.Event, l *Leaky) *types.Event {
c.posterior = c.prior
l.logger.Debugf("starting bayesian evaluation with prior: %v", c.posterior)
for _, bevent := range c.bayesianEventArray {
err := bevent.bayesianUpdate(c, msg, l)
if err != nil {
l.logger.Errorf("bayesian update failed for %s with %s", bevent.rawCondition.ConditionalFilterName, err)
}
}
l.logger.Debugf("value of posterior after events : %v", c.posterior)
if c.posterior > c.threshold {
l.logger.Debugf("Bayesian bucket overflow")
l.Ovflw_ts = l.Last_ts
l.Out <- l.Queue
return nil
}
return &msg
}
}
func (b *BayesianEvent) bayesianUpdate(c *BayesianBucket, msg types.Event, l *Leaky) error {
var condition, ok bool
if b.conditionalFilterRuntime == nil {
l.logger.Tracef("empty conditional filter runtime for %s", b.rawCondition.ConditionalFilterName)
return nil
}
l.logger.Tracef("guillotine value for %s : %v", b.rawCondition.ConditionalFilterName, b.getGuillotineState())
if b.getGuillotineState() {
l.logger.Tracef("guillotine already triggered for %s", b.rawCondition.ConditionalFilterName)
l.logger.Tracef("condition true updating prior for: %s", b.rawCondition.ConditionalFilterName)
c.posterior = updateProbability(c.posterior, b.rawCondition.ProbGivenEvil, b.rawCondition.ProbGivenBenign)
l.logger.Tracef("new value of posterior : %v", c.posterior)
return nil
}
l.logger.Debugf("running condition expression: %s", b.rawCondition.ConditionalFilterName)
ret, err := expr.Run(b.conditionalFilterRuntime, map[string]interface{}{"evt": &msg, "queue": l.Queue, "leaky": l})
if err != nil {
return fmt.Errorf("unable to run conditional filter: %s", err)
}
l.logger.Tracef("bayesian bucket expression %s returned : %v", b.rawCondition.ConditionalFilterName, ret)
if condition, ok = ret.(bool); !ok {
return fmt.Errorf("bayesian condition unexpected non-bool return: %T", ret)
}
l.logger.Tracef("condition %T updating prior for: %s", condition, b.rawCondition.ConditionalFilterName)
if condition {
c.posterior = updateProbability(c.posterior, b.rawCondition.ProbGivenEvil, b.rawCondition.ProbGivenBenign)
b.triggerGuillotine()
} else {
c.posterior = updateProbability(c.posterior, 1-b.rawCondition.ProbGivenEvil, 1-b.rawCondition.ProbGivenBenign)
}
l.logger.Tracef("new value of posterior: %v", c.posterior)
return nil
}
func (b *BayesianEvent) getGuillotineState() bool {
if b.rawCondition.Guillotine {
return b.guillotineState
}
return false
}
func (b *BayesianEvent) triggerGuillotine() {
b.guillotineState = true
}
func (b *BayesianEvent) compileCondition() error {
var err error
var compiledExpr *vm.Program
if compiled, ok := conditionalExprCache[b.rawCondition.ConditionalFilterName]; ok {
b.conditionalFilterRuntime = &compiled
return nil
}
conditionalExprCacheLock.Unlock()
//release the lock during compile same as coditional bucket
compiledExpr, err = expr.Compile(b.rawCondition.ConditionalFilterName, exprhelpers.GetExprOptions(map[string]interface{}{"queue": &Queue{}, "leaky": &Leaky{}, "evt": &types.Event{}})...)
if err != nil {
return fmt.Errorf("bayesian condition compile error: %w", err)
}
b.conditionalFilterRuntime = compiledExpr
conditionalExprCacheLock.Lock()
conditionalExprCache[b.rawCondition.ConditionalFilterName] = *compiledExpr
return nil
}

View file

@ -191,6 +191,10 @@ func FromFactory(bucketFactory BucketFactory) *Leaky {
l.conditionalOverflow = true
l.Duration = l.BucketConfig.leakspeed
}
if l.BucketConfig.Type == "bayesian" {
l.Duration = l.BucketConfig.leakspeed
}
return l
}

View file

@ -51,6 +51,9 @@ type BucketFactory struct {
Profiling bool `yaml:"profiling"` //Profiling, if true, will make the bucket record pours/overflows/etc.
OverflowFilter string `yaml:"overflow_filter"` //OverflowFilter if present, is a filter that must return true for the overflow to go through
ConditionalOverflow string `yaml:"condition"` //condition if present, is an expression that must return true for the bucket to overflow
BayesianPrior float32 `yaml:"bayesian_prior"`
BayesianThreshold float32 `yaml:"bayesian_threshold"`
BayesianConditions []RawBayesianCondition `yaml:"bayesian_conditions"` //conditions for the bayesian bucket
ScopeType types.ScopeType `yaml:"scope,omitempty"` //to enforce a different remediation than blocking an IP. Will default this to IP
BucketName string `yaml:"-"`
Filename string `yaml:"-"`
@ -120,6 +123,25 @@ func ValidateFactory(bucketFactory *BucketFactory) error {
if bucketFactory.leakspeed == 0 {
return fmt.Errorf("bad leakspeed for conditional bucket '%s'", bucketFactory.LeakSpeed)
}
} else if bucketFactory.Type == "bayesian" {
if bucketFactory.BayesianConditions == nil {
return fmt.Errorf("bayesian bucket must have bayesian conditions")
}
if bucketFactory.BayesianPrior == 0 {
return fmt.Errorf("bayesian bucket must have a valid, non-zero prior")
}
if bucketFactory.BayesianThreshold == 0 {
return fmt.Errorf("bayesian bucket must have a valid, non-zero threshold")
}
if bucketFactory.BayesianPrior > 1 {
return fmt.Errorf("bayesian bucket must have a valid, non-zero prior")
}
if bucketFactory.BayesianThreshold > 1 {
return fmt.Errorf("bayesian bucket must have a valid, non-zero threshold")
}
if bucketFactory.Capacity != -1 {
return fmt.Errorf("bayesian bucket must have capacity -1")
}
} else {
return fmt.Errorf("unknown bucket type '%s'", bucketFactory.Type)
}
@ -316,6 +338,8 @@ func LoadBucket(bucketFactory *BucketFactory, tomb *tomb.Tomb) error {
bucketFactory.processors = append(bucketFactory.processors, &DumbProcessor{})
case "conditional":
bucketFactory.processors = append(bucketFactory.processors, &DumbProcessor{})
case "bayesian":
bucketFactory.processors = append(bucketFactory.processors, &DumbProcessor{})
default:
return fmt.Errorf("invalid type '%s' in %s : %v", bucketFactory.Type, bucketFactory.Filename, err)
}
@ -355,6 +379,11 @@ func LoadBucket(bucketFactory *BucketFactory, tomb *tomb.Tomb) error {
bucketFactory.processors = append(bucketFactory.processors, &ConditionalOverflow{})
}
if bucketFactory.BayesianThreshold != 0 {
bucketFactory.logger.Tracef("Adding bayesian processor")
bucketFactory.processors = append(bucketFactory.processors, &BayesianBucket{})
}
if len(bucketFactory.Data) > 0 {
for _, data := range bucketFactory.Data {
if data.DestPath == "" {

View file

@ -119,3 +119,25 @@ func TestCounterBucketsConfig(t *testing.T) {
}
}
func TestBayesianBucketsConfig(t *testing.T) {
var CfgTests = []cfgTest{
//basic valid counter
{BucketFactory{Name: "test", Description: "test1", Type: "bayesian", Capacity: -1, Filter: "true", BayesianPrior: 0.5, BayesianThreshold: 0.5, BayesianConditions: []RawBayesianCondition{{ConditionalFilterName: "true", ProbGivenEvil: 0.5, ProbGivenBenign: 0.5}}}, true, true},
//bad capacity
{BucketFactory{Name: "test", Description: "test1", Type: "bayesian", Capacity: 1, Filter: "true", BayesianPrior: 0.5, BayesianThreshold: 0.5, BayesianConditions: []RawBayesianCondition{{ConditionalFilterName: "true", ProbGivenEvil: 0.5, ProbGivenBenign: 0.5}}}, false, false},
//missing prior
{BucketFactory{Name: "test", Description: "test1", Type: "bayesian", Capacity: -1, Filter: "true", BayesianThreshold: 0.5, BayesianConditions: []RawBayesianCondition{{ConditionalFilterName: "true", ProbGivenEvil: 0.5, ProbGivenBenign: 0.5}}}, false, false},
//missing threshold
{BucketFactory{Name: "test", Description: "test1", Type: "bayesian", Capacity: -1, Filter: "true", BayesianPrior: 0.5, BayesianConditions: []RawBayesianCondition{{ConditionalFilterName: "true", ProbGivenEvil: 0.5, ProbGivenBenign: 0.5}}}, false, false},
//bad prior
{BucketFactory{Name: "test", Description: "test1", Type: "bayesian", Capacity: -1, Filter: "true", BayesianPrior: 1.5, BayesianThreshold: 0.5, BayesianConditions: []RawBayesianCondition{{ConditionalFilterName: "true", ProbGivenEvil: 0.5, ProbGivenBenign: 0.5}}}, false, false},
//bad threshold
{BucketFactory{Name: "test", Description: "test1", Type: "bayesian", Capacity: -1, Filter: "true", BayesianPrior: 0.5, BayesianThreshold: 1.5, BayesianConditions: []RawBayesianCondition{{ConditionalFilterName: "true", ProbGivenEvil: 0.5, ProbGivenBenign: 0.5}}}, false, false},
}
if err := runTest(CfgTests); err != nil {
t.Fatalf("%s", err)
}
}

View file

@ -0,0 +1,21 @@
type: bayesian
name: test/guillotine-bayesian
debug: true
description: "bayesian bucket"
filter: "evt.Meta.log_type == 'http_access-log' || evt.Meta.log_type == 'ssh_access-log'"
groupby: evt.Meta.source_ip
bayesian_prior: 0.5
bayesian_threshold: 0.8
bayesian_conditions:
- condition: evt.Meta.http_path == "/"
prob_given_evil: 0.8
prob_given_benign: 0.2
guillotine : true
- condition: evt.Meta.ssh_user == "admin"
prob_given_evil: 0.9
prob_given_benign: 0.5
guillotine : true
leakspeed: 30s
capacity: -1
labels:
type: overflow_1

View file

@ -0,0 +1 @@
- filename: {{.TestDirectory}}/bucket.yaml

View file

@ -0,0 +1,50 @@
{
"lines": [
{
"Line": {
"Labels": {
"type": "nginx"
},
"Raw": "don't care"
},
"MarshaledTime": "2020-01-01T10:00:00.000Z",
"Meta": {
"source_ip": "2a00:1450:4007:816::200e",
"log_type": "http_access-log",
"http_path": "/"
}
},
{
"Line": {
"Labels": {
"type": "nginx"
},
"Raw": "don't care"
},
"MarshaledTime": "2020-01-01T10:00:00.000Z",
"Meta": {
"source_ip": "2a00:1450:4007:816::200e",
"log_type": "ssh_access-log",
"ssh_user": "admin"
}
}
],
"results": [
{
"Type" : 1,
"Alert": {
"sources" : {
"2a00:1450:4007:816::200e": {
"ip": "2a00:1450:4007:816::200e",
"scope": "Ip",
"value": "2a00:1450:4007:816::200e"
}
},
"Alert" : {
"scenario": "test/guillotine-bayesian",
"events_count": 2
}
}
}
]
}

View file

@ -0,0 +1,21 @@
type: bayesian
name: test/multiple-bayesian
debug: true
description: "bayesian bucket"
filter: "evt.Meta.log_type == 'http_access-log' || evt.Meta.log_type == 'ssh_access-log'"
groupby: evt.Meta.source_ip
bayesian_prior: 0.5
bayesian_threshold: 0.8
bayesian_conditions:
- condition: evt.Meta.http_path == "/"
prob_given_evil: 0.8
prob_given_benign: 0.2
guillotine : true
- condition: evt.Meta.ssh_user == "admin"
prob_given_evil: 0.9
prob_given_benign: 0.5
guillotine : true
leakspeed: 30s
capacity: -1
labels:
type: overflow_1

View file

@ -0,0 +1 @@
- filename: {{.TestDirectory}}/bucket.yaml

View file

@ -0,0 +1,64 @@
{
"lines": [
{
"Line": {
"Labels": {
"type": "nginx"
},
"Raw": "don't care"
},
"MarshaledTime": "2020-01-01T10:00:00.000Z",
"Meta": {
"source_ip": "2a00:1450:4007:816::200e",
"log_type": "http_access-log",
"http_path": "/"
}
},
{
"Line": {
"Labels": {
"type": "nginx"
},
"Raw": "don't care"
},
"MarshaledTime": "2020-01-01T10:00:00.000Z",
"Meta": {
"source_ip": "1.2.3.4",
"log_type": "ssh_access-log",
"ssh_user": "admin"
}
},
{
"Line": {
"Labels": {
"type": "nginx"
},
"Raw": "don't care"
},
"MarshaledTime": "2020-01-01T10:00:00.000Z",
"Meta": {
"source_ip": "2a00:1450:4007:816::200e",
"log_type": "ssh_access-log",
"ssh_user": "admin"
}
}
],
"results": [
{
"Type" : 1,
"Alert": {
"sources" : {
"2a00:1450:4007:816::200e": {
"ip": "2a00:1450:4007:816::200e",
"scope": "Ip",
"value": "2a00:1450:4007:816::200e"
}
},
"Alert" : {
"scenario": "test/multiple-bayesian",
"events_count": 2
}
}
}
]
}

View file

@ -0,0 +1,19 @@
type: bayesian
name: test/simple-bayesian
debug: true
description: "bayesian bucket"
filter: "evt.Meta.log_type == 'http_access-log' || evt.Meta.log_type == 'ssh_access-log'"
groupby: evt.Meta.source_ip
bayesian_prior: 0.5
bayesian_threshold: 0.8
bayesian_conditions:
- condition: any(queue.Queue, {.Meta.http_path == "/"})
prob_given_evil: 0.8
prob_given_benign: 0.2
- condition: any(queue.Queue, {.Meta.ssh_user == "admin"})
prob_given_evil: 0.9
prob_given_benign: 0.5
leakspeed: 30s
capacity: -1
labels:
type: overflow_1

View file

@ -0,0 +1 @@
- filename: {{.TestDirectory}}/bucket.yaml

View file

@ -0,0 +1,50 @@
{
"lines": [
{
"Line": {
"Labels": {
"type": "nginx"
},
"Raw": "don't care"
},
"MarshaledTime": "2020-01-01T10:00:00.000Z",
"Meta": {
"source_ip": "2a00:1450:4007:816::200e",
"log_type": "http_access-log",
"http_path": "/"
}
},
{
"Line": {
"Labels": {
"type": "nginx"
},
"Raw": "don't care"
},
"MarshaledTime": "2020-01-01T10:00:00.000Z",
"Meta": {
"source_ip": "2a00:1450:4007:816::200e",
"log_type": "ssh_access-log",
"ssh_user": "admin"
}
}
],
"results": [
{
"Type" : 1,
"Alert": {
"sources" : {
"2a00:1450:4007:816::200e": {
"ip": "2a00:1450:4007:816::200e",
"scope": "Ip",
"value": "2a00:1450:4007:816::200e"
}
},
"Alert" : {
"scenario": "test/simple-bayesian",
"events_count": 2
}
}
}
]
}