Procházet zdrojové kódy

Merge pull request #43095 from aaronlehmann/avoid-regexp-simple-cases

fileutils: Avoid compiling a regexp for simple patterns
Aaron Lehmann před 3 roky
rodič
revize
40bb983175
2 změnil soubory, kde provedl 136 přidání a 9 odebrání
  1. 56 8
      pkg/fileutils/fileutils.go
  2. 80 1
      pkg/fileutils/fileutils_test.go

+ 56 - 8
pkg/fileutils/fileutils.go

@@ -285,12 +285,23 @@ func (pm *PatternMatcher) Patterns() []*Pattern {
 
 // Pattern defines a single regexp used to filter file paths.
 type Pattern struct {
+	matchType      matchType
 	cleanedPattern string
 	dirs           []string
 	regexp         *regexp.Regexp
 	exclusion      bool
 }
 
+type matchType int
+
+const (
+	unknownMatch matchType = iota
+	exactMatch
+	prefixMatch
+	suffixMatch
+	regexpMatch
+)
+
 func (p *Pattern) String() string {
 	return p.cleanedPattern
 }
@@ -301,18 +312,34 @@ func (p *Pattern) Exclusion() bool {
 }
 
 func (p *Pattern) match(path string) (bool, error) {
-	if p.regexp == nil {
-		if err := p.compile(); err != nil {
+	if p.matchType == unknownMatch {
+		if err := p.compile(string(os.PathSeparator)); err != nil {
 			return false, filepath.ErrBadPattern
 		}
 	}
 
-	b := p.regexp.MatchString(path)
+	switch p.matchType {
+	case exactMatch:
+		return path == p.cleanedPattern, nil
+	case prefixMatch:
+		// strip trailing **
+		return strings.HasPrefix(path, p.cleanedPattern[:len(p.cleanedPattern)-2]), nil
+	case suffixMatch:
+		// strip leading **
+		suffix := p.cleanedPattern[2:]
+		if strings.HasSuffix(path, suffix) {
+			return true, nil
+		}
+		// **/foo matches "foo"
+		return suffix[0] == os.PathSeparator && path == suffix[1:], nil
+	case regexpMatch:
+		return p.regexp.MatchString(path), nil
+	}
 
-	return b, nil
+	return false, nil
 }
 
-func (p *Pattern) compile() error {
+func (p *Pattern) compile(sl string) error {
 	regStr := "^"
 	pattern := p.cleanedPattern
 	// Go through the pattern and convert it to a regexp.
@@ -320,13 +347,13 @@ func (p *Pattern) compile() error {
 	var scan scanner.Scanner
 	scan.Init(strings.NewReader(pattern))
 
-	sl := string(os.PathSeparator)
 	escSL := sl
 	if sl == `\` {
 		escSL += `\`
 	}
 
-	for scan.Peek() != scanner.EOF {
+	p.matchType = exactMatch
+	for i := 0; scan.Peek() != scanner.EOF; i++ {
 		ch := scan.Next()
 
 		if ch == '*' {
@@ -341,20 +368,32 @@ func (p *Pattern) compile() error {
 
 				if scan.Peek() == scanner.EOF {
 					// is "**EOF" - to align with .gitignore just accept all
-					regStr += ".*"
+					if p.matchType == exactMatch {
+						p.matchType = prefixMatch
+					} else {
+						regStr += ".*"
+						p.matchType = regexpMatch
+					}
 				} else {
 					// is "**"
 					// Note that this allows for any # of /'s (even 0) because
 					// the .* will eat everything, even /'s
 					regStr += "(.*" + escSL + ")?"
+					p.matchType = regexpMatch
+				}
+
+				if i == 0 {
+					p.matchType = suffixMatch
 				}
 			} else {
 				// is "*" so map it to anything but "/"
 				regStr += "[^" + escSL + "]*"
+				p.matchType = regexpMatch
 			}
 		} else if ch == '?' {
 			// "?" is any char except "/"
 			regStr += "[^" + escSL + "]"
+			p.matchType = regexpMatch
 		} else if shouldEscape(ch) {
 			// Escape some regexp special chars that have no meaning
 			// in golang's filepath.Match
@@ -371,14 +410,22 @@ func (p *Pattern) compile() error {
 			}
 			if scan.Peek() != scanner.EOF {
 				regStr += `\` + string(scan.Next())
+				p.matchType = regexpMatch
 			} else {
 				regStr += `\`
 			}
+		} else if ch == '[' || ch == ']' {
+			regStr += string(ch)
+			p.matchType = regexpMatch
 		} else {
 			regStr += string(ch)
 		}
 	}
 
+	if p.matchType != regexpMatch {
+		return nil
+	}
+
 	regStr += "$"
 
 	re, err := regexp.Compile(regStr)
@@ -387,6 +434,7 @@ func (p *Pattern) compile() error {
 	}
 
 	p.regexp = re
+	p.matchType = regexpMatch
 	return nil
 }
 

+ 80 - 1
pkg/fileutils/fileutils_test.go

@@ -382,10 +382,14 @@ func TestMatches(t *testing.T) {
 		{"a(b)c/def", "a(b)c/def", true},
 		{"a(b)c/def", "a(b)c/xyz", false},
 		{"a.|)$(}+{bc", "a.|)$(}+{bc", true},
+		{"dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", true},
+		{"dist/*.whl", "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", true},
 	}
 	multiPatternTests := []multiPatternTestCase{
 		{[]string{"**", "!util/docker/web"}, "util/docker/web/foo", false},
 		{[]string{"**", "!util/docker/web", "util/docker/web/foo"}, "util/docker/web/foo", true},
+		{[]string{"**", "!dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl"}, "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", false},
+		{[]string{"**", "!dist/*.whl"}, "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", false},
 	}
 
 	if runtime.GOOS != "windows" {
@@ -671,7 +675,7 @@ func errp(e error) string {
 	return e.Error()
 }
 
-// TestMatch test's our version of filepath.Match, called regexpMatch.
+// TestMatch tests our version of filepath.Match, called Matches.
 func TestMatch(t *testing.T) {
 	for _, tt := range matchTests {
 		pattern := tt.pattern
@@ -690,3 +694,78 @@ func TestMatch(t *testing.T) {
 		}
 	}
 }
+
+type compileTestCase struct {
+	pattern               string
+	matchType             matchType
+	compiledRegexp        string
+	windowsCompiledRegexp string
+}
+
+var compileTests = []compileTestCase{
+	{"*", regexpMatch, `^[^/]*$`, `^[^\\]*$`},
+	{"file*", regexpMatch, `^file[^/]*$`, `^file[^\\]*$`},
+	{"*file", regexpMatch, `^[^/]*file$`, `^[^\\]*file$`},
+	{"a*/b", regexpMatch, `^a[^/]*/b$`, `^a[^\\]*\\b$`},
+	{"**", suffixMatch, "", ""},
+	{"**/**", regexpMatch, `^(.*/)?.*$`, `^(.*\\)?.*$`},
+	{"dir/**", prefixMatch, "", ""},
+	{"**/dir", suffixMatch, "", ""},
+	{"**/dir2/*", regexpMatch, `^(.*/)?dir2/[^/]*$`, `^(.*\\)?dir2\\[^\\]*$`},
+	{"**/dir2/**", regexpMatch, `^(.*/)?dir2/.*$`, `^(.*\\)?dir2\\.*$`},
+	{"**file", suffixMatch, "", ""},
+	{"**/file*txt", regexpMatch, `^(.*/)?file[^/]*txt$`, `^(.*\\)?file[^\\]*txt$`},
+	{"**/**/*.txt", regexpMatch, `^(.*/)?(.*/)?[^/]*\.txt$`, `^(.*\\)?(.*\\)?[^\\]*\.txt$`},
+	{"a[b-d]e", regexpMatch, `^a[b-d]e$`, `^a[b-d]e$`},
+	{".*", regexpMatch, `^\.[^/]*$`, `^\.[^\\]*$`},
+	{"abc.def", exactMatch, "", ""},
+	{"abc?def", regexpMatch, `^abc[^/]def$`, `^abc[^\\]def$`},
+	{"**/foo/bar", suffixMatch, "", ""},
+	{"a(b)c/def", exactMatch, "", ""},
+	{"a.|)$(}+{bc", exactMatch, "", ""},
+	{"dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", exactMatch, "", ""},
+}
+
+// TestCompile confirms that "compile" assigns the correct match type to a
+// variety of test case patterns. If the match type is regexp, it also confirms
+// that the compiled regexp matches the expected regexp.
+func TestCompile(t *testing.T) {
+	t.Run("slash", testCompile("/"))
+	t.Run("backslash", testCompile(`\`))
+}
+
+func testCompile(sl string) func(*testing.T) {
+	return func(t *testing.T) {
+		for _, tt := range compileTests {
+			// Avoid NewPatternMatcher, which has platform-specific behavior
+			pm := &PatternMatcher{
+				patterns: make([]*Pattern, 1),
+			}
+			pattern := path.Clean(tt.pattern)
+			if sl != "/" {
+				pattern = strings.ReplaceAll(pattern, "/", sl)
+			}
+			newp := &Pattern{}
+			newp.cleanedPattern = pattern
+			newp.dirs = strings.Split(pattern, sl)
+			pm.patterns[0] = newp
+
+			if err := pm.patterns[0].compile(sl); err != nil {
+				t.Fatalf("Failed to compile pattern %q: %v", pattern, err)
+			}
+			if pm.patterns[0].matchType != tt.matchType {
+				t.Errorf("pattern %q: matchType = %v, want %v", pattern, pm.patterns[0].matchType, tt.matchType)
+				continue
+			}
+			if tt.matchType == regexpMatch {
+				if sl == `\` {
+					if pm.patterns[0].regexp.String() != tt.windowsCompiledRegexp {
+						t.Errorf("pattern %q: regexp = %s, want %s", pattern, pm.patterns[0].regexp, tt.windowsCompiledRegexp)
+					}
+				} else if pm.patterns[0].regexp.String() != tt.compiledRegexp {
+					t.Errorf("pattern %q: regexp = %s, want %s", pattern, pm.patterns[0].regexp, tt.compiledRegexp)
+				}
+			}
+		}
+	}
+}