add test and fix for configuration reload (#1808)
fix reload issue by returning new configuration to the signal loop example: run crowdsec, disable agent in the config file, reload config. Now there is no agent but the signal loop believes there is, so triggering a reload configuration again will make the process hang forever. This commit updates the configuration in the signal loop with the one returned by the signal handler.
This commit is contained in:
parent
7144dca68a
commit
708fa8280a
6 changed files with 167 additions and 40 deletions
|
@ -8,42 +8,45 @@ import (
|
|||
|
||||
"github.com/coreos/go-systemd/daemon"
|
||||
"github.com/pkg/errors"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"gopkg.in/tomb.v2"
|
||||
|
||||
"github.com/crowdsecurity/crowdsec/pkg/csconfig"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/database"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/exprhelpers"
|
||||
leaky "github.com/crowdsecurity/crowdsec/pkg/leakybucket"
|
||||
"github.com/crowdsecurity/crowdsec/pkg/types"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"gopkg.in/tomb.v2"
|
||||
//"github.com/sevlyar/go-daemon"
|
||||
)
|
||||
|
||||
//nolint: deadcode,unused // debugHandler is kept as a dev convenience : it shuts down and serialize internal state
|
||||
//nolint: deadcode,unused // debugHandler is kept as a dev convenience: it shuts down and serialize internal state
|
||||
func debugHandler(sig os.Signal, cConfig *csconfig.Config) error {
|
||||
var tmpFile string
|
||||
var err error
|
||||
//stop go routines
|
||||
if err := ShutdownCrowdsecRoutines(); err != nil {
|
||||
var (
|
||||
tmpFile string
|
||||
err error
|
||||
)
|
||||
|
||||
// stop goroutines
|
||||
if err = ShutdownCrowdsecRoutines(); err != nil {
|
||||
log.Warningf("Failed to shut down routines: %s", err)
|
||||
}
|
||||
//todo : properly stop acquis with the tail readers
|
||||
|
||||
// todo: properly stop acquis with the tail readers
|
||||
if tmpFile, err = leaky.DumpBucketsStateAt(time.Now().UTC(), cConfig.Crowdsec.BucketStateDumpDir, buckets); err != nil {
|
||||
log.Warningf("Failed dumping bucket state : %s", err)
|
||||
}
|
||||
|
||||
if err := leaky.ShutdownAllBuckets(buckets); err != nil {
|
||||
log.Warningf("while shutting down routines : %s", err)
|
||||
}
|
||||
|
||||
log.Printf("shutdown is finished buckets are in %s", tmpFile)
|
||||
return nil
|
||||
}
|
||||
|
||||
func reloadHandler(sig os.Signal, cConfig *csconfig.Config) error {
|
||||
func reloadHandler(sig os.Signal) (*csconfig.Config, error) {
|
||||
var tmpFile string
|
||||
var err error
|
||||
/*
|
||||
re-init tombs
|
||||
*/
|
||||
|
||||
// re-initialize tombs
|
||||
acquisTomb = tomb.Tomb{}
|
||||
parsersTomb = tomb.Tomb{}
|
||||
bucketsTomb = tomb.Tomb{}
|
||||
|
@ -52,18 +55,21 @@ func reloadHandler(sig os.Signal, cConfig *csconfig.Config) error {
|
|||
crowdsecTomb = tomb.Tomb{}
|
||||
pluginTomb = tomb.Tomb{}
|
||||
|
||||
cConfig, err = csconfig.NewConfig(flags.ConfigFile, flags.DisableAgent, flags.DisableAPI)
|
||||
cConfig, err := csconfig.NewConfig(flags.ConfigFile, flags.DisableAgent, flags.DisableAPI)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := LoadConfig(cConfig); err != nil {
|
||||
return err
|
||||
if err = LoadConfig(cConfig); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Configure logging
|
||||
if err = types.SetDefaultLoggerConfig(cConfig.Common.LogMedia, cConfig.Common.LogDir, *cConfig.Common.LogLevel,
|
||||
cConfig.Common.LogMaxSize, cConfig.Common.LogMaxFiles, cConfig.Common.LogMaxAge, cConfig.Common.CompressLogs, cConfig.Common.ForceColorLogs); err != nil {
|
||||
return err
|
||||
if err = types.SetDefaultLoggerConfig(cConfig.Common.LogMedia,
|
||||
cConfig.Common.LogDir, *cConfig.Common.LogLevel,
|
||||
cConfig.Common.LogMaxSize, cConfig.Common.LogMaxFiles,
|
||||
cConfig.Common.LogMaxAge, cConfig.Common.CompressLogs,
|
||||
cConfig.Common.ForceColorLogs); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !cConfig.DisableAPI {
|
||||
|
@ -73,7 +79,7 @@ func reloadHandler(sig os.Signal, cConfig *csconfig.Config) error {
|
|||
}
|
||||
apiServer, err := initAPIServer(cConfig)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "unable to init api server")
|
||||
return nil, errors.Wrap(err, "unable to init api server")
|
||||
}
|
||||
|
||||
apiReady := make(chan bool, 1)
|
||||
|
@ -83,29 +89,32 @@ func reloadHandler(sig os.Signal, cConfig *csconfig.Config) error {
|
|||
if !cConfig.DisableAgent {
|
||||
csParsers, err := initCrowdsec(cConfig)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "unable to init crowdsec")
|
||||
return nil, errors.Wrap(err, "unable to init crowdsec")
|
||||
}
|
||||
//restore bucket state
|
||||
|
||||
// restore bucket state
|
||||
if tmpFile != "" {
|
||||
log.Warningf("we are now using %s as a state file", tmpFile)
|
||||
cConfig.Crowdsec.BucketStateFile = tmpFile
|
||||
}
|
||||
//reload the simulation state
|
||||
|
||||
// reload the simulation state
|
||||
if err := cConfig.LoadSimulation(); err != nil {
|
||||
log.Errorf("reload error (simulation) : %s", err)
|
||||
}
|
||||
|
||||
agentReady := make(chan bool, 1)
|
||||
serveCrowdsec(csParsers, cConfig, agentReady)
|
||||
}
|
||||
|
||||
log.Printf("Reload is finished")
|
||||
//delete the tmp file, it's safe now :)
|
||||
// delete the tmp file, it's safe now :)
|
||||
if tmpFile != "" {
|
||||
if err := os.Remove(tmpFile); err != nil {
|
||||
log.Warningf("Failed to delete temp file (%s) : %s", tmpFile, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return cConfig, nil
|
||||
}
|
||||
|
||||
func ShutdownCrowdsecRoutines() error {
|
||||
|
@ -115,34 +124,40 @@ func ShutdownCrowdsecRoutines() error {
|
|||
if len(dataSources) > 0 {
|
||||
acquisTomb.Kill(nil)
|
||||
log.Debugf("waiting for acquisition to finish")
|
||||
|
||||
if err := acquisTomb.Wait(); err != nil {
|
||||
log.Warningf("Acquisition returned error : %s", err)
|
||||
reterr = err
|
||||
}
|
||||
}
|
||||
|
||||
log.Debugf("acquisition is finished, wait for parser/bucket/ouputs.")
|
||||
parsersTomb.Kill(nil)
|
||||
if err := parsersTomb.Wait(); err != nil {
|
||||
log.Warningf("Parsers returned error : %s", err)
|
||||
reterr = err
|
||||
}
|
||||
|
||||
log.Debugf("parsers is done")
|
||||
time.Sleep(1 * time.Second) //ugly workaround for now to ensure PourItemtoholders are finished
|
||||
time.Sleep(1 * time.Second) // ugly workaround for now to ensure PourItemtoholders are finished
|
||||
bucketsTomb.Kill(nil)
|
||||
|
||||
if err := bucketsTomb.Wait(); err != nil {
|
||||
log.Warningf("Buckets returned error : %s", err)
|
||||
reterr = err
|
||||
}
|
||||
|
||||
log.Debugf("buckets is done")
|
||||
time.Sleep(1 * time.Second) //ugly workaround for now
|
||||
time.Sleep(1 * time.Second) // ugly workaround for now
|
||||
outputsTomb.Kill(nil)
|
||||
|
||||
if err := outputsTomb.Wait(); err != nil {
|
||||
log.Warningf("Ouputs returned error : %s", err)
|
||||
reterr = err
|
||||
|
||||
}
|
||||
|
||||
log.Debugf("outputs are done")
|
||||
//everything is dead johny
|
||||
// He's dead, Jim.
|
||||
crowdsecTomb.Kill(nil)
|
||||
|
||||
return reterr
|
||||
|
@ -151,9 +166,11 @@ func ShutdownCrowdsecRoutines() error {
|
|||
func shutdownAPI() error {
|
||||
log.Debugf("shutting down api via Tomb")
|
||||
apiTomb.Kill(nil)
|
||||
|
||||
if err := apiTomb.Wait(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Debugf("done")
|
||||
return nil
|
||||
}
|
||||
|
@ -161,9 +178,11 @@ func shutdownAPI() error {
|
|||
func shutdownCrowdsec() error {
|
||||
log.Debugf("shutting down crowdsec via Tomb")
|
||||
crowdsecTomb.Kill(nil)
|
||||
|
||||
if err := crowdsecTomb.Wait(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Debugf("done")
|
||||
return nil
|
||||
}
|
||||
|
@ -174,23 +193,33 @@ func shutdown(sig os.Signal, cConfig *csconfig.Config) error {
|
|||
return errors.Wrap(err, "failed to shut down crowdsec")
|
||||
}
|
||||
}
|
||||
|
||||
if !cConfig.DisableAPI {
|
||||
if err := shutdownAPI(); err != nil {
|
||||
return errors.Wrap(err, "failed to shut down api routines")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func HandleSignals(cConfig *csconfig.Config) error {
|
||||
var (
|
||||
newConfig *csconfig.Config
|
||||
err error
|
||||
)
|
||||
|
||||
signalChan := make(chan os.Signal, 1)
|
||||
//We add os.Interrupt mostly to ease windows dev, it allows to simulate a clean shutdown when running in the console
|
||||
|
||||
// We add os.Interrupt mostly to ease windows development,
|
||||
// it allows to simulate a clean shutdown when running in the console
|
||||
signal.Notify(signalChan,
|
||||
syscall.SIGHUP,
|
||||
syscall.SIGTERM,
|
||||
os.Interrupt)
|
||||
|
||||
exitChan := make(chan error)
|
||||
|
||||
go func() {
|
||||
defer types.CatchPanic("crowdsec/HandleSignals")
|
||||
Loop:
|
||||
|
@ -200,19 +229,28 @@ func HandleSignals(cConfig *csconfig.Config) error {
|
|||
// kill -SIGHUP XXXX
|
||||
case syscall.SIGHUP:
|
||||
log.Warning("SIGHUP received, reloading")
|
||||
if err := shutdown(s, cConfig); err != nil {
|
||||
|
||||
if err = shutdown(s, cConfig); err != nil {
|
||||
exitChan <- errors.Wrap(err, "failed shutdown")
|
||||
|
||||
break Loop
|
||||
}
|
||||
if err := reloadHandler(s, cConfig); err != nil {
|
||||
|
||||
if newConfig, err = reloadHandler(s); err != nil {
|
||||
exitChan <- errors.Wrap(err, "reload handler failure")
|
||||
|
||||
break Loop
|
||||
}
|
||||
|
||||
if newConfig != nil {
|
||||
cConfig = newConfig
|
||||
}
|
||||
// ctrl+C, kill -SIGINT XXXX, kill -SIGTERM XXXX
|
||||
case os.Interrupt, syscall.SIGTERM:
|
||||
log.Warning("SIGTERM received, shutting down")
|
||||
if err := shutdown(s, cConfig); err != nil {
|
||||
if err = shutdown(s, cConfig); err != nil {
|
||||
exitChan <- errors.Wrap(err, "failed shutdown")
|
||||
|
||||
break Loop
|
||||
}
|
||||
exitChan <- nil
|
||||
|
@ -220,7 +258,7 @@ func HandleSignals(cConfig *csconfig.Config) error {
|
|||
}
|
||||
}()
|
||||
|
||||
err := <-exitChan
|
||||
err = <-exitChan
|
||||
if err == nil {
|
||||
log.Warning("Crowdsec service shutting down")
|
||||
}
|
||||
|
@ -241,6 +279,7 @@ func Serve(cConfig *csconfig.Config, apiReady chan bool, agentReady chan bool) e
|
|||
if err != nil {
|
||||
return errors.Wrap(err, "failed to get database client")
|
||||
}
|
||||
|
||||
err = exprhelpers.Init(dbClient)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to init expr helpers")
|
||||
|
@ -250,6 +289,7 @@ func Serve(cConfig *csconfig.Config, apiReady chan bool, agentReady chan bool) e
|
|||
if err != nil {
|
||||
return errors.Wrap(err, "failed to init expr helpers")
|
||||
}
|
||||
|
||||
log.Warningln("Exprhelpers loaded without database client.")
|
||||
}
|
||||
|
||||
|
@ -257,14 +297,17 @@ func Serve(cConfig *csconfig.Config, apiReady chan bool, agentReady chan bool) e
|
|||
if cConfig.API.Server.OnlineClient == nil || cConfig.API.Server.OnlineClient.Credentials == nil {
|
||||
log.Warningf("Communication with CrowdSec Central API disabled from configuration file")
|
||||
}
|
||||
|
||||
if flags.DisableCAPI {
|
||||
log.Warningf("Communication with CrowdSec Central API disabled from args")
|
||||
cConfig.API.Server.OnlineClient = nil
|
||||
}
|
||||
|
||||
apiServer, err := initAPIServer(cConfig)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "api server init")
|
||||
}
|
||||
|
||||
if !flags.TestMode {
|
||||
serveAPIServer(apiServer, apiReady)
|
||||
}
|
||||
|
@ -277,7 +320,8 @@ func Serve(cConfig *csconfig.Config, apiReady chan bool, agentReady chan bool) e
|
|||
if err != nil {
|
||||
return errors.Wrap(err, "crowdsec init")
|
||||
}
|
||||
/* if it's just linting, we're done */
|
||||
|
||||
// if it's just linting, we're done
|
||||
if !flags.TestMode {
|
||||
serveCrowdsec(csParsers, cConfig, agentReady)
|
||||
}
|
||||
|
@ -296,7 +340,8 @@ func Serve(cConfig *csconfig.Config, apiReady chan bool, agentReady chan bool) e
|
|||
if !sent || err != nil {
|
||||
log.Errorf("Failed to notify(sent: %v): %v", sent, err)
|
||||
}
|
||||
/*wait for signals*/
|
||||
|
||||
// wait for signals
|
||||
return HandleSignals(cConfig)
|
||||
}
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@ teardown_file() {
|
|||
|
||||
setup() {
|
||||
load "../lib/setup.sh"
|
||||
load "../lib/bats-file/load.bash"
|
||||
./instance-data load
|
||||
}
|
||||
|
||||
|
@ -50,7 +51,7 @@ teardown() {
|
|||
@test "crowdsec - print error on exit" {
|
||||
# errors that cause program termination are printed to stderr, not only logs
|
||||
config_set '.db_config.type="meh"'
|
||||
run -1 --separate-stderr "${BIN_DIR}/crowdsec"
|
||||
run -1 --separate-stderr "${CROWDSEC}"
|
||||
refute_output
|
||||
assert_stderr --partial "unable to create database client: unknown database type 'meh'"
|
||||
}
|
||||
|
@ -59,3 +60,72 @@ teardown() {
|
|||
CS_LAPI_SECRET=foo run -1 --separate-stderr timeout 2s "${CROWDSEC}"
|
||||
assert_stderr --partial "api server init: unable to run local API: controller init: CS_LAPI_SECRET not strong enough"
|
||||
}
|
||||
|
||||
@test "crowdsec - reload (change of logfile, disabled agent)" {
|
||||
logdir1=$(TMPDIR="${BATS_TEST_TMPDIR}" mktemp -u)
|
||||
log_old="${logdir1}/crowdsec.log"
|
||||
config_set ".common.log_dir=\"${logdir1}\""
|
||||
|
||||
run -0 ./instance-crowdsec start
|
||||
# PID="$output"
|
||||
assert_file_exist "$log_old"
|
||||
assert_file_contains "$log_old" "Starting processing data"
|
||||
|
||||
logdir2=$(TMPDIR="${BATS_TEST_TMPDIR}" mktemp -u)
|
||||
log_new="${logdir2}/crowdsec.log"
|
||||
config_set ".common.log_dir=\"${logdir2}\""
|
||||
|
||||
config_disable_agent
|
||||
|
||||
sleep 5
|
||||
|
||||
# this won't work as crowdsec-wrapper does not relay the signal
|
||||
# run -0 kill -HUP "$PID"
|
||||
|
||||
run killall -HUP "$BIN_DIR/crowdsec.cover"
|
||||
run killall -HUP "$BIN_DIR/crowdsec"
|
||||
|
||||
for ((i=0; i<20; i++)); do
|
||||
sleep 1
|
||||
grep -q "killing all plugins" <"$log_old" && break
|
||||
done
|
||||
|
||||
echo "waited $i seconds"
|
||||
|
||||
echo
|
||||
echo "OLD LOG"
|
||||
echo
|
||||
ls -la "$log_old" || true
|
||||
cat "$log_old" || true
|
||||
|
||||
assert_file_contains "$log_old" "SIGHUP received, reloading"
|
||||
assert_file_contains "$log_old" "Crowdsec engine shutting down"
|
||||
assert_file_contains "$log_old" "Killing parser routines"
|
||||
assert_file_contains "$log_old" "Bucket routine exiting"
|
||||
assert_file_contains "$log_old" "serve: shutting down api server"
|
||||
assert_file_contains "$log_old" "plugingTomb dying"
|
||||
assert_file_contains "$log_old" "killing all plugins"
|
||||
|
||||
sleep 5
|
||||
|
||||
assert_file_exist "$log_new"
|
||||
|
||||
for ((i=0; i<20; i++)); do
|
||||
sleep 1
|
||||
grep -q "Reload is finished" <"$log_old" && break
|
||||
done
|
||||
|
||||
echo "waited $i seconds"
|
||||
|
||||
echo
|
||||
echo "NEW LOG"
|
||||
echo
|
||||
ls -la "$log_new" || true
|
||||
cat "$log_new" || true
|
||||
|
||||
assert_file_contains "$log_new" "CrowdSec Local API listening on 127.0.0.1:8080"
|
||||
assert_file_contains "$log_new" "Reload is finished"
|
||||
|
||||
run -0 ./instance-crowdsec stop
|
||||
}
|
||||
|
||||
|
|
|
@ -15,6 +15,15 @@ THIS_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)
|
|||
set -o pipefail # don't let sed hide the statuscode
|
||||
mkdir -p "${LOCAL_DIR}/var/lib/coverage"
|
||||
|
||||
# this would be nice but doesn't work, since the binary is not running in background
|
||||
#_hup() {
|
||||
# echo "killall -1 crowdsec.cover"
|
||||
# killall -HUP crowdsec.cover
|
||||
#}
|
||||
#
|
||||
## relay the "configuration reload" signal
|
||||
#trap _hup SIGHUP
|
||||
|
||||
# we collect rc and output by hand, because setting -o pipefail would trigger a
|
||||
# SIGPIPE.
|
||||
set +e
|
||||
|
|
|
@ -37,6 +37,7 @@ start() {
|
|||
-o "${LOG_DIR}/crowdsec.out" \
|
||||
"${CROWDSEC}"
|
||||
./bin/wait-for-port 6060
|
||||
cat "$DAEMON_PID"
|
||||
}
|
||||
|
||||
stop() {
|
||||
|
@ -48,6 +49,7 @@ stop() {
|
|||
if [[ -n "${PGID}" ]]; then
|
||||
kill -- "-${PGID}"
|
||||
fi
|
||||
|
||||
rm -f -- "${DAEMON_PID}"
|
||||
fi
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@ fi
|
|||
start() {
|
||||
systemctl start crowdsec
|
||||
./bin/wait-for-port 6060
|
||||
pidof /usr/bin/crowdsec
|
||||
}
|
||||
|
||||
stop() {
|
||||
|
|
|
@ -155,7 +155,7 @@ export -f assert_json
|
|||
# to check if something was passed by mistake, since if you read it, it will be
|
||||
# incomplete.
|
||||
is_stdin_empty() {
|
||||
if read -t 0.1; then
|
||||
if read -r -t 0.1; then
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
|
|
Loading…
Reference in a new issue