Browse Source

daemon.ContainerLogs(): fix resource leak on follow

When daemon.ContainerLogs() is called with options.follow=true
(as in "docker logs --follow"), the "loggerutils.followLogs()"
function never returns (even then the logs consumer is gone).
As a result, all the resources associated with it (including
an opened file descriptor for the log file being read, two FDs
for a pipe, and two FDs for inotify watch) are never released.

If this is repeated (such as by running "docker logs --follow"
and pressing Ctrl-C a few times), this results in DoS caused by
either hitting the limit of inotify watches, or the limit of
opened files. The only cure is daemon restart.

Apparently, what happens is:

1. logs producer (a container) is gone, calling (*LogWatcher).Close()
for all its readers (daemon/logger/jsonfilelog/jsonfilelog.go:175).

2. WatchClose() is properly handled by a dedicated goroutine in
followLogs(), cancelling the context.

3. Upon receiving the ctx.Done(), the code in followLogs()
(daemon/logger/loggerutils/logfile.go#L626-L638) keeps to
send messages _synchronously_ (which is OK for now).

4. Logs consumer is gone (Ctrl-C is pressed on a terminal running
"docker logs --follow"). Method (*LogWatcher).Close() is properly
called (see daemon/logs.go:114). Since it was called before and
due to to once.Do(), nothing happens (which is kinda good, as
otherwise it will panic on closing a closed channel).

5. A goroutine (see item 3 above) keeps sending log messages
synchronously to the logWatcher.Msg channel. Since the
channel reader is gone, the channel send operation blocks forever,
and resource cleanup set up in defer statements at the beginning
of followLogs() never happens.

Alas, the fix is somewhat complicated:

1. Distinguish between close from logs producer and logs consumer.
To that effect,
 - yet another channel is added to LogWatcher();
 - {Watch,}Close() are renamed to {Watch,}ProducerGone();
 - {Watch,}ConsumerGone() are added;

*NOTE* that ProducerGone()/WatchProducerGone() pair is ONLY needed
in order to stop ConsumerLogs(follow=true) when a container is stopped;
otherwise we're not interested in it. In other words, we're only
using it in followLogs().

2. Code that was doing (logWatcher*).Close() is modified to either call
ProducerGone() or ConsumerGone(), depending on the context.

3. Code that was waiting for WatchClose() is modified to wait for
either ConsumerGone() or ProducerGone(), or both, depending on the
context.

4. followLogs() are modified accordingly:
 - context cancellation is happening on WatchProducerGone(),
and once it's received the FileWatcher is closed and waitRead()
returns errDone on EOF (i.e. log rotation handling logic is disabled);
 - due to this, code that was writing synchronously to logWatcher.Msg
can be and is removed as the code above it handles this case;
 - function returns once ConsumerGone is received, freeing all the
resources -- this is the bugfix itself.

While at it,

1. Let's also remove the ctx usage to simplify the code a bit.
It was introduced by commit a69a59ffc7e3d ("Decouple removing the
fileWatcher from reading") in order to fix a bug. The bug was actually
a deadlock in fsnotify, and the fix was just a workaround. Since then
the fsnofify bug has been fixed, and a new fsnotify was vendored in.
For more details, please see
https://github.com/moby/moby/pull/27782#issuecomment-416794490

2. Since `(*filePoller).Close()` is fixed to remove all the files
being watched, there is no need to explicitly call
fileWatcher.Remove(name) anymore, so get rid of the extra code.

Should fix https://github.com/moby/moby/issues/37391

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
Kir Kolyshkin 7 năm trước cách đây
mục cha
commit
916eabd459

+ 1 - 1
daemon/attach.go

@@ -123,7 +123,7 @@ func (daemon *Daemon) containerAttach(c *container.Container, cfg *stream.Attach
 			return logger.ErrReadLogsNotSupported{}
 			return logger.ErrReadLogsNotSupported{}
 		}
 		}
 		logs := cLog.ReadLogs(logger.ReadConfig{Tail: -1})
 		logs := cLog.ReadLogs(logger.ReadConfig{Tail: -1})
-		defer logs.Close()
+		defer logs.ConsumerGone()
 
 
 	LogLoop:
 	LogLoop:
 		for {
 		for {

+ 3 - 13
daemon/logger/adapter.go

@@ -93,21 +93,12 @@ func (a *pluginAdapterWithRead) ReadLogs(config ReadConfig) *LogWatcher {
 
 
 		dec := logdriver.NewLogEntryDecoder(stream)
 		dec := logdriver.NewLogEntryDecoder(stream)
 		for {
 		for {
-			select {
-			case <-watcher.WatchClose():
-				return
-			default:
-			}
-
 			var buf logdriver.LogEntry
 			var buf logdriver.LogEntry
 			if err := dec.Decode(&buf); err != nil {
 			if err := dec.Decode(&buf); err != nil {
 				if err == io.EOF {
 				if err == io.EOF {
 					return
 					return
 				}
 				}
-				select {
-				case watcher.Err <- errors.Wrap(err, "error decoding log message"):
-				case <-watcher.WatchClose():
-				}
+				watcher.Err <- errors.Wrap(err, "error decoding log message")
 				return
 				return
 			}
 			}
 
 
@@ -125,11 +116,10 @@ func (a *pluginAdapterWithRead) ReadLogs(config ReadConfig) *LogWatcher {
 				return
 				return
 			}
 			}
 
 
+			// send the message unless the consumer is gone
 			select {
 			select {
 			case watcher.Msg <- msg:
 			case watcher.Msg <- msg:
-			case <-watcher.WatchClose():
-				// make sure the message we consumed is sent
-				watcher.Msg <- msg
+			case <-watcher.WatchConsumerGone():
 				return
 				return
 			}
 			}
 		}
 		}

+ 1 - 1
daemon/logger/adapter_test.go

@@ -174,7 +174,7 @@ func TestAdapterReadLogs(t *testing.T) {
 		t.Fatal("timeout waiting for message channel to close")
 		t.Fatal("timeout waiting for message channel to close")
 
 
 	}
 	}
-	lw.Close()
+	lw.ProducerGone()
 
 
 	lw = lr.ReadLogs(ReadConfig{Follow: true})
 	lw = lr.ReadLogs(ReadConfig{Follow: true})
 	for _, x := range testMsg {
 	for _, x := range testMsg {

+ 2 - 2
daemon/logger/journald/read.go

@@ -165,7 +165,7 @@ func (s *journald) Close() error {
 	s.mu.Lock()
 	s.mu.Lock()
 	s.closed = true
 	s.closed = true
 	for reader := range s.readers.readers {
 	for reader := range s.readers.readers {
-		reader.Close()
+		reader.ProducerGone()
 	}
 	}
 	s.mu.Unlock()
 	s.mu.Unlock()
 	return nil
 	return nil
@@ -299,7 +299,7 @@ func (s *journald) followJournal(logWatcher *logger.LogWatcher, j *C.sd_journal,
 	// Wait until we're told to stop.
 	// Wait until we're told to stop.
 	select {
 	select {
 	case cursor = <-newCursor:
 	case cursor = <-newCursor:
-	case <-logWatcher.WatchClose():
+	case <-logWatcher.WatchConsumerGone():
 		// Notify the other goroutine that its work is done.
 		// Notify the other goroutine that its work is done.
 		C.close(pfd[1])
 		C.close(pfd[1])
 		cursor = <-newCursor
 		cursor = <-newCursor

+ 3 - 2
daemon/logger/jsonfilelog/jsonfilelog.go

@@ -166,13 +166,14 @@ func ValidateLogOpt(cfg map[string]string) error {
 	return nil
 	return nil
 }
 }
 
 
-// Close closes underlying file and signals all readers to stop.
+// Close closes underlying file and signals all the readers
+// that the logs producer is gone.
 func (l *JSONFileLogger) Close() error {
 func (l *JSONFileLogger) Close() error {
 	l.mu.Lock()
 	l.mu.Lock()
 	l.closed = true
 	l.closed = true
 	err := l.writer.Close()
 	err := l.writer.Close()
 	for r := range l.readers {
 	for r := range l.readers {
-		r.Close()
+		r.ProducerGone()
 		delete(l.readers, r)
 		delete(l.readers, r)
 	}
 	}
 	l.mu.Unlock()
 	l.mu.Unlock()

+ 1 - 2
daemon/logger/jsonfilelog/read_test.go

@@ -50,11 +50,10 @@ func BenchmarkJSONFileLoggerReadLogs(b *testing.B) {
 	}()
 	}()
 
 
 	lw := jsonlogger.(*JSONFileLogger).ReadLogs(logger.ReadConfig{Follow: true})
 	lw := jsonlogger.(*JSONFileLogger).ReadLogs(logger.ReadConfig{Follow: true})
-	watchClose := lw.WatchClose()
 	for {
 	for {
 		select {
 		select {
 		case <-lw.Msg:
 		case <-lw.Msg:
-		case <-watchClose:
+		case <-lw.WatchProducerGone():
 			return
 			return
 		case err := <-chError:
 		case err := <-chError:
 			if err != nil {
 			if err != nil {

+ 1 - 1
daemon/logger/local/local.go

@@ -166,7 +166,7 @@ func (d *driver) Close() error {
 	d.closed = true
 	d.closed = true
 	err := d.logfile.Close()
 	err := d.logfile.Close()
 	for r := range d.readers {
 	for r := range d.readers {
-		r.Close()
+		r.ProducerGone()
 		delete(d.readers, r)
 		delete(d.readers, r)
 	}
 	}
 	d.mu.Unlock()
 	d.mu.Unlock()

+ 32 - 15
daemon/logger/logger.go

@@ -104,33 +104,50 @@ type LogWatcher struct {
 	// For sending log messages to a reader.
 	// For sending log messages to a reader.
 	Msg chan *Message
 	Msg chan *Message
 	// For sending error messages that occur while while reading logs.
 	// For sending error messages that occur while while reading logs.
-	Err           chan error
-	closeOnce     sync.Once
-	closeNotifier chan struct{}
+	Err          chan error
+	producerOnce sync.Once
+	producerGone chan struct{}
+	consumerOnce sync.Once
+	consumerGone chan struct{}
 }
 }
 
 
 // NewLogWatcher returns a new LogWatcher.
 // NewLogWatcher returns a new LogWatcher.
 func NewLogWatcher() *LogWatcher {
 func NewLogWatcher() *LogWatcher {
 	return &LogWatcher{
 	return &LogWatcher{
-		Msg:           make(chan *Message, logWatcherBufferSize),
-		Err:           make(chan error, 1),
-		closeNotifier: make(chan struct{}),
+		Msg:          make(chan *Message, logWatcherBufferSize),
+		Err:          make(chan error, 1),
+		producerGone: make(chan struct{}),
+		consumerGone: make(chan struct{}),
 	}
 	}
 }
 }
 
 
-// Close notifies the underlying log reader to stop.
-func (w *LogWatcher) Close() {
+// ProducerGone notifies the underlying log reader that
+// the logs producer (a container) is gone.
+func (w *LogWatcher) ProducerGone() {
 	// only close if not already closed
 	// only close if not already closed
-	w.closeOnce.Do(func() {
-		close(w.closeNotifier)
+	w.producerOnce.Do(func() {
+		close(w.producerGone)
 	})
 	})
 }
 }
 
 
-// WatchClose returns a channel receiver that receives notification
-// when the watcher has been closed. This should only be called from
-// one goroutine.
-func (w *LogWatcher) WatchClose() <-chan struct{} {
-	return w.closeNotifier
+// WatchProducerGone returns a channel receiver that receives notification
+// once the logs producer (a container) is gone.
+func (w *LogWatcher) WatchProducerGone() <-chan struct{} {
+	return w.producerGone
+}
+
+// ConsumerGone notifies that the logs consumer is gone.
+func (w *LogWatcher) ConsumerGone() {
+	// only close if not already closed
+	w.consumerOnce.Do(func() {
+		close(w.consumerGone)
+	})
+}
+
+// WatchConsumerGone returns a channel receiver that receives notification
+// when the log watcher consumer is gone.
+func (w *LogWatcher) WatchConsumerGone() <-chan struct{} {
+	return w.consumerGone
 }
 }
 
 
 // Capability defines the list of capabilities that a driver can implement
 // Capability defines the list of capabilities that a driver can implement

+ 10 - 31
daemon/logger/loggerutils/logfile.go

@@ -488,7 +488,7 @@ func tailFiles(files []SizeReaderAt, watcher *logger.LogWatcher, createDecoder m
 	go func() {
 	go func() {
 		select {
 		select {
 		case <-ctx.Done():
 		case <-ctx.Done():
-		case <-watcher.WatchClose():
+		case <-watcher.WatchConsumerGone():
 			cancel()
 			cancel()
 		}
 		}
 	}()
 	}()
@@ -546,22 +546,9 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int
 	}
 	}
 	defer func() {
 	defer func() {
 		f.Close()
 		f.Close()
-		fileWatcher.Remove(name)
 		fileWatcher.Close()
 		fileWatcher.Close()
 	}()
 	}()
 
 
-	ctx, cancel := context.WithCancel(context.Background())
-	defer cancel()
-	go func() {
-		select {
-		case <-logWatcher.WatchClose():
-			fileWatcher.Remove(name)
-			cancel()
-		case <-ctx.Done():
-			return
-		}
-	}()
-
 	var retries int
 	var retries int
 	handleRotate := func() error {
 	handleRotate := func() error {
 		f.Close()
 		f.Close()
@@ -596,7 +583,9 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int
 			case fsnotify.Rename, fsnotify.Remove:
 			case fsnotify.Rename, fsnotify.Remove:
 				select {
 				select {
 				case <-notifyRotate:
 				case <-notifyRotate:
-				case <-ctx.Done():
+				case <-logWatcher.WatchProducerGone():
+					return errDone
+				case <-logWatcher.WatchConsumerGone():
 					return errDone
 					return errDone
 				}
 				}
 				if err := handleRotate(); err != nil {
 				if err := handleRotate(); err != nil {
@@ -618,7 +607,9 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int
 				return errRetry
 				return errRetry
 			}
 			}
 			return err
 			return err
-		case <-ctx.Done():
+		case <-logWatcher.WatchProducerGone():
+			return errDone
+		case <-logWatcher.WatchConsumerGone():
 			return errDone
 			return errDone
 		}
 		}
 	}
 	}
@@ -664,23 +655,11 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int
 		if !until.IsZero() && msg.Timestamp.After(until) {
 		if !until.IsZero() && msg.Timestamp.After(until) {
 			return
 			return
 		}
 		}
+		// send the message, unless the consumer is gone
 		select {
 		select {
 		case logWatcher.Msg <- msg:
 		case logWatcher.Msg <- msg:
-		case <-ctx.Done():
-			logWatcher.Msg <- msg
-			for {
-				msg, err := decodeLogLine()
-				if err != nil {
-					return
-				}
-				if !since.IsZero() && msg.Timestamp.Before(since) {
-					continue
-				}
-				if !until.IsZero() && msg.Timestamp.After(until) {
-					return
-				}
-				logWatcher.Msg <- msg
-			}
+		case <-logWatcher.WatchConsumerGone():
+			return
 		}
 		}
 	}
 	}
 }
 }

+ 2 - 2
daemon/logger/loggerutils/logfile_test.go

@@ -77,7 +77,7 @@ func TestTailFiles(t *testing.T) {
 	}
 	}
 }
 }
 
 
-func TestFollowLogsClose(t *testing.T) {
+func TestFollowLogsConsumerGone(t *testing.T) {
 	lw := logger.NewLogWatcher()
 	lw := logger.NewLogWatcher()
 
 
 	f, err := ioutil.TempFile("", t.Name())
 	f, err := ioutil.TempFile("", t.Name())
@@ -110,7 +110,7 @@ func TestFollowLogsClose(t *testing.T) {
 		t.Fatal("timeout waiting for log message")
 		t.Fatal("timeout waiting for log message")
 	}
 	}
 
 
-	lw.Close()
+	lw.ConsumerGone()
 	select {
 	select {
 	case <-followLogsDone:
 	case <-followLogsDone:
 	case <-time.After(20 * time.Second):
 	case <-time.After(20 * time.Second):

+ 2 - 2
daemon/logs.go

@@ -110,8 +110,8 @@ func (daemon *Daemon) ContainerLogs(ctx context.Context, containerName string, c
 				}
 				}
 			}()
 			}()
 		}
 		}
-		// set up some defers
-		defer logs.Close()
+		// signal that the log reader is gone
+		defer logs.ConsumerGone()
 
 
 		// close the messages channel. closing is the only way to signal above
 		// close the messages channel. closing is the only way to signal above
 		// that we're doing with logs (other than context cancel i guess).
 		// that we're doing with logs (other than context cancel i guess).