From 3989f91075d3ba33aebd7a490407865fe9d8da4b Mon Sep 17 00:00:00 2001 From: Brian Goff Date: Wed, 6 May 2020 09:35:13 -0700 Subject: [PATCH] logfile: Check if log is closed on close error during rotate This prevents getting into a situation where a container log cannot make progress because we tried to rotate a file, got an error, and now the file is closed. The next time we try to write a log entry it will try and rotate again but error that the file is already closed. I wonder if there is more we can do to beef up this rotation logic. Found this issue while investigating missing logs with errors in the docker daemon logs like: ``` Failed to log message for json-file: error closing file: close : file already closed ``` I'm not sure why the original rotation failed since the data was no longer available. Signed-off-by: Brian Goff --- daemon/logger/loggerutils/logfile.go | 7 ++- daemon/logger/loggerutils/logfile_test.go | 64 +++++++++++++++++++++++ 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/daemon/logger/loggerutils/logfile.go b/daemon/logger/loggerutils/logfile.go index 88e2b5f6bb..8028dd335d 100644 --- a/daemon/logger/loggerutils/logfile.go +++ b/daemon/logger/loggerutils/logfile.go @@ -188,8 +188,11 @@ func (w *LogFile) checkCapacityAndRotate() error { w.rotateMu.Lock() fname := w.f.Name() if err := w.f.Close(); err != nil { - w.rotateMu.Unlock() - return errors.Wrap(err, "error closing file") + // if there was an error during a prior rotate, the file could already be closed + if !errors.Is(err, os.ErrClosed) { + w.rotateMu.Unlock() + return errors.Wrap(err, "error closing file") + } } if err := rotate(fname, w.maxFiles, w.compress); err != nil { w.rotateMu.Unlock() diff --git a/daemon/logger/loggerutils/logfile_test.go b/daemon/logger/loggerutils/logfile_test.go index 8e3ce1aa41..aee1ac7849 100644 --- a/daemon/logger/loggerutils/logfile_test.go +++ b/daemon/logger/loggerutils/logfile_test.go @@ -11,6 +11,7 @@ import ( "time" "github.com/docker/docker/daemon/logger" + "github.com/docker/docker/pkg/pubsub" "github.com/docker/docker/pkg/tailfile" "gotest.tools/v3/assert" ) @@ -235,3 +236,66 @@ func TestFollowLogsProducerGone(t *testing.T) { default: } } + +func TestCheckCapacityAndRotate(t *testing.T) { + dir, err := ioutil.TempDir("", t.Name()) + assert.NilError(t, err) + defer os.RemoveAll(dir) + + f, err := ioutil.TempFile(dir, "log") + assert.NilError(t, err) + + l := &LogFile{ + f: f, + capacity: 5, + maxFiles: 3, + compress: true, + notifyRotate: pubsub.NewPublisher(0, 1), + perms: 0600, + marshal: func(msg *logger.Message) ([]byte, error) { + return msg.Line, nil + }, + } + defer l.Close() + + assert.NilError(t, l.WriteLogEntry(&logger.Message{Line: []byte("hello world!")})) + + dStringer := dirStringer{dir} + + _, err = os.Stat(f.Name() + ".1") + assert.Assert(t, os.IsNotExist(err), dStringer) + + assert.NilError(t, l.WriteLogEntry(&logger.Message{Line: []byte("hello world!")})) + _, err = os.Stat(f.Name() + ".1") + assert.NilError(t, err, dStringer) + + assert.NilError(t, l.WriteLogEntry(&logger.Message{Line: []byte("hello world!")})) + _, err = os.Stat(f.Name() + ".1") + assert.NilError(t, err, dStringer) + _, err = os.Stat(f.Name() + ".2.gz") + assert.NilError(t, err, dStringer) + + // Now let's simulate a failed rotation where the file was able to be closed but something else happened elsewhere + // down the line. + // We want to make sure that we can recover in the case that `l.f` was closed while attempting a rotation. + l.f.Close() + assert.NilError(t, l.WriteLogEntry(&logger.Message{Line: []byte("hello world!")})) +} + +type dirStringer struct { + d string +} + +func (d dirStringer) String() string { + ls, err := ioutil.ReadDir(d.d) + if err != nil { + return "" + } + var s strings.Builder + s.WriteString("\n") + + for _, fi := range ls { + s.WriteString(fi.Name() + "\n") + } + return s.String() +}