Jelajahi Sumber

pkg/fileutils: Track incremental pattern match results against each pattern

The existing code does not correctly handle the case where a file
matches one of the patterns, but should not match overall because of an
exclude pattern that applied to a parent directory (see
https://github.com/docker/buildx/issues/850).

Fix this by independently tracking the results of matching against each
pattern. A file should be considered to match any pattern that matched a
parent dir.

Signed-off-by: Aaron Lehmann <alehmann@netflix.com>
Aaron Lehmann 3 tahun lalu
induk
melakukan
4555d3aa54
3 mengubah file dengan 133 tambahan dan 7 penghapusan
  1. 8 7
      pkg/archive/archive.go
  2. 75 0
      pkg/fileutils/fileutils.go
  3. 50 0
      pkg/fileutils/fileutils_test.go

+ 8 - 7
pkg/archive/archive.go

@@ -866,8 +866,8 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error)
 			rebaseName := options.RebaseNames[include]
 
 			var (
-				parentMatched []bool
-				parentDirs    []string
+				parentMatchInfo []fileutils.MatchInfo
+				parentDirs      []string
 			)
 
 			walkRoot := getWalkRoot(srcPath, include)
@@ -902,13 +902,14 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error)
 							break
 						}
 						parentDirs = parentDirs[:len(parentDirs)-1]
-						parentMatched = parentMatched[:len(parentMatched)-1]
+						parentMatchInfo = parentMatchInfo[:len(parentMatchInfo)-1]
 					}
 
-					if len(parentMatched) != 0 {
-						skip, err = pm.MatchesUsingParentResult(relFilePath, parentMatched[len(parentMatched)-1])
+					var matchInfo fileutils.MatchInfo
+					if len(parentMatchInfo) != 0 {
+						skip, matchInfo, err = pm.MatchesUsingParentResults(relFilePath, parentMatchInfo[len(parentMatchInfo)-1])
 					} else {
-						skip, err = pm.MatchesOrParentMatches(relFilePath)
+						skip, matchInfo, err = pm.MatchesUsingParentResults(relFilePath, fileutils.MatchInfo{})
 					}
 					if err != nil {
 						logrus.Errorf("Error matching %s: %v", relFilePath, err)
@@ -917,7 +918,7 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error)
 
 					if f.IsDir() {
 						parentDirs = append(parentDirs, relFilePath)
-						parentMatched = append(parentMatched, skip)
+						parentMatchInfo = append(parentMatchInfo, matchInfo)
 					}
 				}
 

+ 75 - 0
pkg/fileutils/fileutils.go

@@ -172,6 +172,9 @@ func (pm *PatternMatcher) MatchesOrParentMatches(file string) (bool, error) {
 // The "file" argument should be a slash-delimited path.
 //
 // MatchesUsingParentResult is not safe to call concurrently.
+//
+// Deprecated in favor of MatchesUsingParentResults: this function does behave
+// correctly in some cases (see https://github.com/docker/buildx/issues/850).
 func (pm *PatternMatcher) MatchesUsingParentResult(file string, parentMatched bool) (bool, error) {
 	matched := parentMatched
 	file = filepath.FromSlash(file)
@@ -196,6 +199,78 @@ func (pm *PatternMatcher) MatchesUsingParentResult(file string, parentMatched bo
 	return matched, nil
 }
 
+// MatchInfo tracks information about parent dir matches while traversing a
+// filesystem.
+type MatchInfo struct {
+	parentMatched []bool
+}
+
+// MatchesUsingParentResults returns true if "file" matches any of the patterns
+// and isn't excluded by any of the subsequent patterns. The functionality is
+// the same as Matches, but as an optimization, the caller passes in
+// intermediate results from matching the parent directory.
+//
+// The "file" argument should be a slash-delimited path.
+//
+// MatchesUsingParentResults is not safe to call concurrently.
+func (pm *PatternMatcher) MatchesUsingParentResults(file string, parentMatchInfo MatchInfo) (bool, MatchInfo, error) {
+	parentMatched := parentMatchInfo.parentMatched
+	if len(parentMatched) != 0 && len(parentMatched) != len(pm.patterns) {
+		return false, MatchInfo{}, errors.New("wrong number of values in parentMatched")
+	}
+
+	file = filepath.FromSlash(file)
+	matched := false
+
+	matchInfo := MatchInfo{
+		parentMatched: make([]bool, len(pm.patterns)),
+	}
+	for i, pattern := range pm.patterns {
+		match := false
+		// If the parent matched this pattern, we don't need to recheck.
+		if len(parentMatched) != 0 {
+			match = parentMatched[i]
+		}
+
+		if !match {
+			// Skip evaluation if this is an inclusion and the filename
+			// already matched the pattern, or it's an exclusion and it has
+			// not matched the pattern yet.
+			if pattern.exclusion != matched {
+				continue
+			}
+
+			var err error
+			match, err = pattern.match(file)
+			if err != nil {
+				return false, matchInfo, err
+			}
+
+			// If the zero value of MatchInfo was passed in, we don't have
+			// any information about the parent dir's match results, and we
+			// apply the same logic as MatchesOrParentMatches.
+			if len(parentMatched) == 0 {
+				if parentPath := filepath.Dir(file); parentPath != "." {
+					parentPathDirs := strings.Split(parentPath, string(os.PathSeparator))
+					// Check to see if the pattern matches one of our parent dirs.
+					for i := range parentPathDirs {
+						match, _ = pattern.match(strings.Join(parentPathDirs[:i+1], string(os.PathSeparator)))
+						if match {
+							break
+						}
+					}
+				}
+			}
+		}
+		matchInfo.parentMatched[i] = match
+
+		if match {
+			matched = !pattern.exclusion
+		}
+	}
+	return matched, matchInfo, nil
+}
+
 // Exclusions returns true if any of the patterns define exclusions
 func (pm *PatternMatcher) Exclusions() bool {
 	return pm.exclusions

+ 50 - 0
pkg/fileutils/fileutils_test.go

@@ -309,6 +309,12 @@ type matchesTestCase struct {
 	pass    bool
 }
 
+type multiPatternTestCase struct {
+	patterns []string
+	text     string
+	pass     bool
+}
+
 func TestMatches(t *testing.T) {
 	tests := []matchesTestCase{
 		{"**", "file", true},
@@ -377,6 +383,10 @@ func TestMatches(t *testing.T) {
 		{"a(b)c/def", "a(b)c/xyz", false},
 		{"a.|)$(}+{bc", "a.|)$(}+{bc", true},
 	}
+	multiPatternTests := []multiPatternTestCase{
+		{[]string{"**", "!util/docker/web"}, "util/docker/web/foo", false},
+		{[]string{"**", "!util/docker/web", "util/docker/web/foo"}, "util/docker/web/foo", true},
+	}
 
 	if runtime.GOOS != "windows" {
 		tests = append(tests, []matchesTestCase{
@@ -392,6 +402,14 @@ func TestMatches(t *testing.T) {
 			res, _ := pm.MatchesOrParentMatches(test.text)
 			assert.Check(t, is.Equal(test.pass, res), desc)
 		}
+
+		for _, test := range multiPatternTests {
+			desc := fmt.Sprintf("patterns=%q text=%q", test.patterns, test.text)
+			pm, err := NewPatternMatcher(test.patterns)
+			assert.NilError(t, err, desc)
+			res, _ := pm.MatchesOrParentMatches(test.text)
+			assert.Check(t, is.Equal(test.pass, res), desc)
+		}
 	})
 
 	t.Run("MatchesUsingParentResult", func(t *testing.T) {
@@ -415,6 +433,38 @@ func TestMatches(t *testing.T) {
 		}
 	})
 
+	t.Run("MatchesUsingParentResults", func(t *testing.T) {
+		check := func(pm *PatternMatcher, text string, pass bool, desc string) {
+			parentPath := filepath.Dir(filepath.FromSlash(text))
+			parentPathDirs := strings.Split(parentPath, string(os.PathSeparator))
+
+			parentMatchInfo := MatchInfo{}
+			if parentPath != "." {
+				for i := range parentPathDirs {
+					_, parentMatchInfo, _ = pm.MatchesUsingParentResults(strings.Join(parentPathDirs[:i+1], "/"), parentMatchInfo)
+				}
+			}
+
+			res, _, _ := pm.MatchesUsingParentResults(text, parentMatchInfo)
+			assert.Check(t, is.Equal(pass, res), desc)
+		}
+
+		for _, test := range tests {
+			desc := fmt.Sprintf("pattern=%q text=%q", test.pattern, test.text)
+			pm, err := NewPatternMatcher([]string{test.pattern})
+			assert.NilError(t, err, desc)
+
+			check(pm, test.text, test.pass, desc)
+		}
+
+		for _, test := range multiPatternTests {
+			desc := fmt.Sprintf("pattern=%q text=%q", test.patterns, test.text)
+			pm, err := NewPatternMatcher(test.patterns)
+			assert.NilError(t, err, desc)
+
+			check(pm, test.text, test.pass, desc)
+		}
+	})
 }
 
 func TestCleanPatterns(t *testing.T) {