add test and fix for configuration reload (#1808)

fix reload issue by returning new configuration to the signal loop

example: run crowdsec, disable agent in the config file, reload config.
Now there is no agent but the signal loop believes there is, so
triggering a reload configuration again will make the process hang
forever.
This commit updates the configuration in the signal loop with the one
returned by the signal handler.
This commit is contained in:
mmetc 2022-10-14 15:48:41 +02:00 committed by GitHub
parent 7144dca68a
commit 708fa8280a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 167 additions and 40 deletions

View file

@ -8,42 +8,45 @@ import (
"github.com/coreos/go-systemd/daemon"
"github.com/pkg/errors"
log "github.com/sirupsen/logrus"
"gopkg.in/tomb.v2"
"github.com/crowdsecurity/crowdsec/pkg/csconfig"
"github.com/crowdsecurity/crowdsec/pkg/database"
"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
leaky "github.com/crowdsecurity/crowdsec/pkg/leakybucket"
"github.com/crowdsecurity/crowdsec/pkg/types"
log "github.com/sirupsen/logrus"
"gopkg.in/tomb.v2"
//"github.com/sevlyar/go-daemon"
)
//nolint: deadcode,unused // debugHandler is kept as a dev convenience : it shuts down and serialize internal state
//nolint: deadcode,unused // debugHandler is kept as a dev convenience: it shuts down and serialize internal state
func debugHandler(sig os.Signal, cConfig *csconfig.Config) error {
var tmpFile string
var err error
//stop go routines
if err := ShutdownCrowdsecRoutines(); err != nil {
var (
tmpFile string
err error
)
// stop goroutines
if err = ShutdownCrowdsecRoutines(); err != nil {
log.Warningf("Failed to shut down routines: %s", err)
}
//todo : properly stop acquis with the tail readers
// todo: properly stop acquis with the tail readers
if tmpFile, err = leaky.DumpBucketsStateAt(time.Now().UTC(), cConfig.Crowdsec.BucketStateDumpDir, buckets); err != nil {
log.Warningf("Failed dumping bucket state : %s", err)
}
if err := leaky.ShutdownAllBuckets(buckets); err != nil {
log.Warningf("while shutting down routines : %s", err)
}
log.Printf("shutdown is finished buckets are in %s", tmpFile)
return nil
}
func reloadHandler(sig os.Signal, cConfig *csconfig.Config) error {
func reloadHandler(sig os.Signal) (*csconfig.Config, error) {
var tmpFile string
var err error
/*
re-init tombs
*/
// re-initialize tombs
acquisTomb = tomb.Tomb{}
parsersTomb = tomb.Tomb{}
bucketsTomb = tomb.Tomb{}
@ -52,18 +55,21 @@ func reloadHandler(sig os.Signal, cConfig *csconfig.Config) error {
crowdsecTomb = tomb.Tomb{}
pluginTomb = tomb.Tomb{}
cConfig, err = csconfig.NewConfig(flags.ConfigFile, flags.DisableAgent, flags.DisableAPI)
cConfig, err := csconfig.NewConfig(flags.ConfigFile, flags.DisableAgent, flags.DisableAPI)
if err != nil {
return err
return nil, err
}
if err := LoadConfig(cConfig); err != nil {
return err
if err = LoadConfig(cConfig); err != nil {
return nil, err
}
// Configure logging
if err = types.SetDefaultLoggerConfig(cConfig.Common.LogMedia, cConfig.Common.LogDir, *cConfig.Common.LogLevel,
cConfig.Common.LogMaxSize, cConfig.Common.LogMaxFiles, cConfig.Common.LogMaxAge, cConfig.Common.CompressLogs, cConfig.Common.ForceColorLogs); err != nil {
return err
if err = types.SetDefaultLoggerConfig(cConfig.Common.LogMedia,
cConfig.Common.LogDir, *cConfig.Common.LogLevel,
cConfig.Common.LogMaxSize, cConfig.Common.LogMaxFiles,
cConfig.Common.LogMaxAge, cConfig.Common.CompressLogs,
cConfig.Common.ForceColorLogs); err != nil {
return nil, err
}
if !cConfig.DisableAPI {
@ -73,7 +79,7 @@ func reloadHandler(sig os.Signal, cConfig *csconfig.Config) error {
}
apiServer, err := initAPIServer(cConfig)
if err != nil {
return errors.Wrap(err, "unable to init api server")
return nil, errors.Wrap(err, "unable to init api server")
}
apiReady := make(chan bool, 1)
@ -83,29 +89,32 @@ func reloadHandler(sig os.Signal, cConfig *csconfig.Config) error {
if !cConfig.DisableAgent {
csParsers, err := initCrowdsec(cConfig)
if err != nil {
return errors.Wrap(err, "unable to init crowdsec")
return nil, errors.Wrap(err, "unable to init crowdsec")
}
//restore bucket state
// restore bucket state
if tmpFile != "" {
log.Warningf("we are now using %s as a state file", tmpFile)
cConfig.Crowdsec.BucketStateFile = tmpFile
}
//reload the simulation state
// reload the simulation state
if err := cConfig.LoadSimulation(); err != nil {
log.Errorf("reload error (simulation) : %s", err)
}
agentReady := make(chan bool, 1)
serveCrowdsec(csParsers, cConfig, agentReady)
}
log.Printf("Reload is finished")
//delete the tmp file, it's safe now :)
// delete the tmp file, it's safe now :)
if tmpFile != "" {
if err := os.Remove(tmpFile); err != nil {
log.Warningf("Failed to delete temp file (%s) : %s", tmpFile, err)
}
}
return nil
return cConfig, nil
}
func ShutdownCrowdsecRoutines() error {
@ -115,34 +124,40 @@ func ShutdownCrowdsecRoutines() error {
if len(dataSources) > 0 {
acquisTomb.Kill(nil)
log.Debugf("waiting for acquisition to finish")
if err := acquisTomb.Wait(); err != nil {
log.Warningf("Acquisition returned error : %s", err)
reterr = err
}
}
log.Debugf("acquisition is finished, wait for parser/bucket/ouputs.")
parsersTomb.Kill(nil)
if err := parsersTomb.Wait(); err != nil {
log.Warningf("Parsers returned error : %s", err)
reterr = err
}
log.Debugf("parsers is done")
time.Sleep(1 * time.Second) //ugly workaround for now to ensure PourItemtoholders are finished
time.Sleep(1 * time.Second) // ugly workaround for now to ensure PourItemtoholders are finished
bucketsTomb.Kill(nil)
if err := bucketsTomb.Wait(); err != nil {
log.Warningf("Buckets returned error : %s", err)
reterr = err
}
log.Debugf("buckets is done")
time.Sleep(1 * time.Second) //ugly workaround for now
time.Sleep(1 * time.Second) // ugly workaround for now
outputsTomb.Kill(nil)
if err := outputsTomb.Wait(); err != nil {
log.Warningf("Ouputs returned error : %s", err)
reterr = err
}
log.Debugf("outputs are done")
//everything is dead johny
// He's dead, Jim.
crowdsecTomb.Kill(nil)
return reterr
@ -151,9 +166,11 @@ func ShutdownCrowdsecRoutines() error {
func shutdownAPI() error {
log.Debugf("shutting down api via Tomb")
apiTomb.Kill(nil)
if err := apiTomb.Wait(); err != nil {
return err
}
log.Debugf("done")
return nil
}
@ -161,9 +178,11 @@ func shutdownAPI() error {
func shutdownCrowdsec() error {
log.Debugf("shutting down crowdsec via Tomb")
crowdsecTomb.Kill(nil)
if err := crowdsecTomb.Wait(); err != nil {
return err
}
log.Debugf("done")
return nil
}
@ -174,23 +193,33 @@ func shutdown(sig os.Signal, cConfig *csconfig.Config) error {
return errors.Wrap(err, "failed to shut down crowdsec")
}
}
if !cConfig.DisableAPI {
if err := shutdownAPI(); err != nil {
return errors.Wrap(err, "failed to shut down api routines")
}
}
return nil
}
func HandleSignals(cConfig *csconfig.Config) error {
var (
newConfig *csconfig.Config
err error
)
signalChan := make(chan os.Signal, 1)
//We add os.Interrupt mostly to ease windows dev, it allows to simulate a clean shutdown when running in the console
// We add os.Interrupt mostly to ease windows development,
// it allows to simulate a clean shutdown when running in the console
signal.Notify(signalChan,
syscall.SIGHUP,
syscall.SIGTERM,
os.Interrupt)
exitChan := make(chan error)
go func() {
defer types.CatchPanic("crowdsec/HandleSignals")
Loop:
@ -200,19 +229,28 @@ func HandleSignals(cConfig *csconfig.Config) error {
// kill -SIGHUP XXXX
case syscall.SIGHUP:
log.Warning("SIGHUP received, reloading")
if err := shutdown(s, cConfig); err != nil {
if err = shutdown(s, cConfig); err != nil {
exitChan <- errors.Wrap(err, "failed shutdown")
break Loop
}
if err := reloadHandler(s, cConfig); err != nil {
if newConfig, err = reloadHandler(s); err != nil {
exitChan <- errors.Wrap(err, "reload handler failure")
break Loop
}
if newConfig != nil {
cConfig = newConfig
}
// ctrl+C, kill -SIGINT XXXX, kill -SIGTERM XXXX
case os.Interrupt, syscall.SIGTERM:
log.Warning("SIGTERM received, shutting down")
if err := shutdown(s, cConfig); err != nil {
if err = shutdown(s, cConfig); err != nil {
exitChan <- errors.Wrap(err, "failed shutdown")
break Loop
}
exitChan <- nil
@ -220,7 +258,7 @@ func HandleSignals(cConfig *csconfig.Config) error {
}
}()
err := <-exitChan
err = <-exitChan
if err == nil {
log.Warning("Crowdsec service shutting down")
}
@ -241,6 +279,7 @@ func Serve(cConfig *csconfig.Config, apiReady chan bool, agentReady chan bool) e
if err != nil {
return errors.Wrap(err, "failed to get database client")
}
err = exprhelpers.Init(dbClient)
if err != nil {
return errors.Wrap(err, "failed to init expr helpers")
@ -250,6 +289,7 @@ func Serve(cConfig *csconfig.Config, apiReady chan bool, agentReady chan bool) e
if err != nil {
return errors.Wrap(err, "failed to init expr helpers")
}
log.Warningln("Exprhelpers loaded without database client.")
}
@ -257,14 +297,17 @@ func Serve(cConfig *csconfig.Config, apiReady chan bool, agentReady chan bool) e
if cConfig.API.Server.OnlineClient == nil || cConfig.API.Server.OnlineClient.Credentials == nil {
log.Warningf("Communication with CrowdSec Central API disabled from configuration file")
}
if flags.DisableCAPI {
log.Warningf("Communication with CrowdSec Central API disabled from args")
cConfig.API.Server.OnlineClient = nil
}
apiServer, err := initAPIServer(cConfig)
if err != nil {
return errors.Wrap(err, "api server init")
}
if !flags.TestMode {
serveAPIServer(apiServer, apiReady)
}
@ -277,7 +320,8 @@ func Serve(cConfig *csconfig.Config, apiReady chan bool, agentReady chan bool) e
if err != nil {
return errors.Wrap(err, "crowdsec init")
}
/* if it's just linting, we're done */
// if it's just linting, we're done
if !flags.TestMode {
serveCrowdsec(csParsers, cConfig, agentReady)
}
@ -296,7 +340,8 @@ func Serve(cConfig *csconfig.Config, apiReady chan bool, agentReady chan bool) e
if !sent || err != nil {
log.Errorf("Failed to notify(sent: %v): %v", sent, err)
}
/*wait for signals*/
// wait for signals
return HandleSignals(cConfig)
}

View file

@ -13,6 +13,7 @@ teardown_file() {
setup() {
load "../lib/setup.sh"
load "../lib/bats-file/load.bash"
./instance-data load
}
@ -50,7 +51,7 @@ teardown() {
@test "crowdsec - print error on exit" {
# errors that cause program termination are printed to stderr, not only logs
config_set '.db_config.type="meh"'
run -1 --separate-stderr "${BIN_DIR}/crowdsec"
run -1 --separate-stderr "${CROWDSEC}"
refute_output
assert_stderr --partial "unable to create database client: unknown database type 'meh'"
}
@ -59,3 +60,72 @@ teardown() {
CS_LAPI_SECRET=foo run -1 --separate-stderr timeout 2s "${CROWDSEC}"
assert_stderr --partial "api server init: unable to run local API: controller init: CS_LAPI_SECRET not strong enough"
}
@test "crowdsec - reload (change of logfile, disabled agent)" {
logdir1=$(TMPDIR="${BATS_TEST_TMPDIR}" mktemp -u)
log_old="${logdir1}/crowdsec.log"
config_set ".common.log_dir=\"${logdir1}\""
run -0 ./instance-crowdsec start
# PID="$output"
assert_file_exist "$log_old"
assert_file_contains "$log_old" "Starting processing data"
logdir2=$(TMPDIR="${BATS_TEST_TMPDIR}" mktemp -u)
log_new="${logdir2}/crowdsec.log"
config_set ".common.log_dir=\"${logdir2}\""
config_disable_agent
sleep 5
# this won't work as crowdsec-wrapper does not relay the signal
# run -0 kill -HUP "$PID"
run killall -HUP "$BIN_DIR/crowdsec.cover"
run killall -HUP "$BIN_DIR/crowdsec"
for ((i=0; i<20; i++)); do
sleep 1
grep -q "killing all plugins" <"$log_old" && break
done
echo "waited $i seconds"
echo
echo "OLD LOG"
echo
ls -la "$log_old" || true
cat "$log_old" || true
assert_file_contains "$log_old" "SIGHUP received, reloading"
assert_file_contains "$log_old" "Crowdsec engine shutting down"
assert_file_contains "$log_old" "Killing parser routines"
assert_file_contains "$log_old" "Bucket routine exiting"
assert_file_contains "$log_old" "serve: shutting down api server"
assert_file_contains "$log_old" "plugingTomb dying"
assert_file_contains "$log_old" "killing all plugins"
sleep 5
assert_file_exist "$log_new"
for ((i=0; i<20; i++)); do
sleep 1
grep -q "Reload is finished" <"$log_old" && break
done
echo "waited $i seconds"
echo
echo "NEW LOG"
echo
ls -la "$log_new" || true
cat "$log_new" || true
assert_file_contains "$log_new" "CrowdSec Local API listening on 127.0.0.1:8080"
assert_file_contains "$log_new" "Reload is finished"
run -0 ./instance-crowdsec stop
}

View file

@ -15,6 +15,15 @@ THIS_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)
set -o pipefail # don't let sed hide the statuscode
mkdir -p "${LOCAL_DIR}/var/lib/coverage"
# this would be nice but doesn't work, since the binary is not running in background
#_hup() {
# echo "killall -1 crowdsec.cover"
# killall -HUP crowdsec.cover
#}
#
## relay the "configuration reload" signal
#trap _hup SIGHUP
# we collect rc and output by hand, because setting -o pipefail would trigger a
# SIGPIPE.
set +e

View file

@ -37,6 +37,7 @@ start() {
-o "${LOG_DIR}/crowdsec.out" \
"${CROWDSEC}"
./bin/wait-for-port 6060
cat "$DAEMON_PID"
}
stop() {
@ -48,6 +49,7 @@ stop() {
if [[ -n "${PGID}" ]]; then
kill -- "-${PGID}"
fi
rm -f -- "${DAEMON_PID}"
fi
}

View file

@ -33,6 +33,7 @@ fi
start() {
systemctl start crowdsec
./bin/wait-for-port 6060
pidof /usr/bin/crowdsec
}
stop() {

View file

@ -155,7 +155,7 @@ export -f assert_json
# to check if something was passed by mistake, since if you read it, it will be
# incomplete.
is_stdin_empty() {
if read -t 0.1; then
if read -r -t 0.1; then
return 1
fi
return 0