From 97921915a801dd82b1f5a70e0a69353539c1e3ae Mon Sep 17 00:00:00 2001
From: Cory Snider <csnider@mirantis.com>
Date: Fri, 3 Feb 2023 13:43:22 -0500
Subject: [PATCH] distribution/xfer: fix download fencepost bug

maxDownloadAttempts maps to the daemon configuration flag

    --max-download-attempts int
      Set the max download attempts for each pull (default 5)

and the daemon configuration machinery interprets a value of 0 as "apply
the default value" and not a valid user value (config validation/
normalization bugs notwithstanding). The intention is clearly that this
configuration value should be an upper limit on the number of times the
daemon should try to download a particular layer before giving up. So it
is surprising to have the configuration value interpreted as a _retry_
limit. The daemon will make up to N+1 attempts to download a layer! This
also means users cannot disable retries even if they wanted to.

Fix the fencepost bug so that max attempts really means max attempts,
not max retries. And fix the fencepost bug with the retry-backoff delay
so that the first backoff is 5s, not 10s.

Signed-off-by: Cory Snider <csnider@mirantis.com>
---
 distribution/xfer/download.go      | 12 ++++++------
 distribution/xfer/download_test.go | 22 +++++++++++-----------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/distribution/xfer/download.go b/distribution/xfer/download.go
index 32f2338e96..e77c564513 100644
--- a/distribution/xfer/download.go
+++ b/distribution/xfer/download.go
@@ -267,7 +267,7 @@ func (ldm *LayerDownloadManager) makeDownloadFunc(descriptor DownloadDescriptor,
 				downloadReader io.ReadCloser
 				size           int64
 				err            error
-				retries        int
+				attempt        int = 1
 			)
 
 			defer descriptor.Close()
@@ -287,16 +287,16 @@ func (ldm *LayerDownloadManager) makeDownloadFunc(descriptor DownloadDescriptor,
 				default:
 				}
 
-				retries++
-				if _, isDNR := err.(DoNotRetry); isDNR || retries > ldm.maxDownloadAttempts {
-					logrus.Errorf("Download failed after %d attempts: %v", retries, err)
+				if _, isDNR := err.(DoNotRetry); isDNR || attempt >= ldm.maxDownloadAttempts {
+					logrus.Errorf("Download failed after %d attempts: %v", attempt, err)
 					d.err = err
 					return
 				}
 
-				logrus.Infof("Download failed, retrying (%d/%d): %v", retries, ldm.maxDownloadAttempts, err)
-				delay := retries * 5
+				logrus.Infof("Download failed, retrying (%d/%d): %v", attempt, ldm.maxDownloadAttempts, err)
+				delay := attempt * 5
 				ticker := time.NewTicker(ldm.waitDuration)
+				attempt++
 
 			selectLoop:
 				for {
diff --git a/distribution/xfer/download_test.go b/distribution/xfer/download_test.go
index ced9575fb0..f45598517d 100644
--- a/distribution/xfer/download_test.go
+++ b/distribution/xfer/download_test.go
@@ -216,7 +216,7 @@ func (d *mockDownloadDescriptor) Download(ctx context.Context, progressOutput pr
 
 	if d.retries < d.simulateRetries {
 		d.retries++
-		return nil, 0, fmt.Errorf("simulating download attempt %d/%d", d.retries, d.simulateRetries)
+		return nil, 0, fmt.Errorf("simulating download attempt failure %d/%d", d.retries, d.simulateRetries)
 	}
 
 	return d.mockTarStream(), 0, nil
@@ -367,25 +367,25 @@ func TestMaxDownloadAttempts(t *testing.T) {
 	}{
 		{
 			name:                "max-attempts=5, succeed at 2nd attempt",
-			simulateRetries:     2,
+			simulateRetries:     1,
 			maxDownloadAttempts: 5,
 		},
 		{
 			name:                "max-attempts=5, succeed at 5th attempt",
+			simulateRetries:     4,
+			maxDownloadAttempts: 5,
+		},
+		{
+			name:                "max-attempts=5, fail at 5th attempt",
 			simulateRetries:     5,
 			maxDownloadAttempts: 5,
+			expectedErr:         "simulating download attempt failure 5/5",
 		},
 		{
-			name:                "max-attempts=5, fail at 6th attempt",
-			simulateRetries:     6,
-			maxDownloadAttempts: 5,
-			expectedErr:         "simulating download attempt 5/6",
-		},
-		{
-			name:                "max-attempts=0, fail after 1 attempt",
+			name:                "max-attempts=1, fail after 1 attempt",
 			simulateRetries:     1,
-			maxDownloadAttempts: 0,
-			expectedErr:         "simulating download attempt 1/1",
+			maxDownloadAttempts: 1,
+			expectedErr:         "simulating download attempt failure 1/1",
 		},
 	}
 	for _, tc := range tests {