Ver Fonte

distribution/xfer: fix download fencepost bug

maxDownloadAttempts maps to the daemon configuration flag

    --max-download-attempts int
      Set the max download attempts for each pull (default 5)

and the daemon configuration machinery interprets a value of 0 as "apply
the default value" and not a valid user value (config validation/
normalization bugs notwithstanding). The intention is clearly that this
configuration value should be an upper limit on the number of times the
daemon should try to download a particular layer before giving up. So it
is surprising to have the configuration value interpreted as a _retry_
limit. The daemon will make up to N+1 attempts to download a layer! This
also means users cannot disable retries even if they wanted to.

Fix the fencepost bug so that max attempts really means max attempts,
not max retries. And fix the fencepost bug with the retry-backoff delay
so that the first backoff is 5s, not 10s.

Signed-off-by: Cory Snider <csnider@mirantis.com>
Cory Snider há 2 anos atrás
pai
commit
97921915a8
2 ficheiros alterados com 15 adições e 15 exclusões
  1. 6 6
      distribution/xfer/download.go
  2. 9 9
      distribution/xfer/download_test.go

+ 6 - 6
distribution/xfer/download.go

@@ -267,7 +267,7 @@ func (ldm *LayerDownloadManager) makeDownloadFunc(descriptor DownloadDescriptor,
 				downloadReader io.ReadCloser
 				size           int64
 				err            error
-				retries        int
+				attempt        int = 1
 			)
 
 			defer descriptor.Close()
@@ -287,16 +287,16 @@ func (ldm *LayerDownloadManager) makeDownloadFunc(descriptor DownloadDescriptor,
 				default:
 				}
 
-				retries++
-				if _, isDNR := err.(DoNotRetry); isDNR || retries > ldm.maxDownloadAttempts {
-					logrus.Errorf("Download failed after %d attempts: %v", retries, err)
+				if _, isDNR := err.(DoNotRetry); isDNR || attempt >= ldm.maxDownloadAttempts {
+					logrus.Errorf("Download failed after %d attempts: %v", attempt, err)
 					d.err = err
 					return
 				}
 
-				logrus.Infof("Download failed, retrying (%d/%d): %v", retries, ldm.maxDownloadAttempts, err)
-				delay := retries * 5
+				logrus.Infof("Download failed, retrying (%d/%d): %v", attempt, ldm.maxDownloadAttempts, err)
+				delay := attempt * 5
 				ticker := time.NewTicker(ldm.waitDuration)
+				attempt++
 
 			selectLoop:
 				for {

+ 9 - 9
distribution/xfer/download_test.go

@@ -216,7 +216,7 @@ func (d *mockDownloadDescriptor) Download(ctx context.Context, progressOutput pr
 
 	if d.retries < d.simulateRetries {
 		d.retries++
-		return nil, 0, fmt.Errorf("simulating download attempt %d/%d", d.retries, d.simulateRetries)
+		return nil, 0, fmt.Errorf("simulating download attempt failure %d/%d", d.retries, d.simulateRetries)
 	}
 
 	return d.mockTarStream(), 0, nil
@@ -367,25 +367,25 @@ func TestMaxDownloadAttempts(t *testing.T) {
 	}{
 		{
 			name:                "max-attempts=5, succeed at 2nd attempt",
-			simulateRetries:     2,
+			simulateRetries:     1,
 			maxDownloadAttempts: 5,
 		},
 		{
 			name:                "max-attempts=5, succeed at 5th attempt",
-			simulateRetries:     5,
+			simulateRetries:     4,
 			maxDownloadAttempts: 5,
 		},
 		{
-			name:                "max-attempts=5, fail at 6th attempt",
-			simulateRetries:     6,
+			name:                "max-attempts=5, fail at 5th attempt",
+			simulateRetries:     5,
 			maxDownloadAttempts: 5,
-			expectedErr:         "simulating download attempt 5/6",
+			expectedErr:         "simulating download attempt failure 5/5",
 		},
 		{
-			name:                "max-attempts=0, fail after 1 attempt",
+			name:                "max-attempts=1, fail after 1 attempt",
 			simulateRetries:     1,
-			maxDownloadAttempts: 0,
-			expectedErr:         "simulating download attempt 1/1",
+			maxDownloadAttempts: 1,
+			expectedErr:         "simulating download attempt failure 1/1",
 		},
 	}
 	for _, tc := range tests {