소스 검색

fileutils: Avoid compiling a regexp for simple patterns

If we detect that a pattern is either an exact match, prefix match, or
suffix match, use an optimized code path instead of compiling a regexp.

Signed-off-by: Aaron Lehmann <alehmann@netflix.com>
Aaron Lehmann 3 년 전
부모
커밋
dd66dcad9c
2개의 변경된 파일58개의 추가작업 그리고 5개의 파일을 삭제
  1. 54 5
      pkg/fileutils/fileutils.go
  2. 4 0
      pkg/fileutils/fileutils_test.go

+ 54 - 5
pkg/fileutils/fileutils.go

@@ -285,12 +285,23 @@ func (pm *PatternMatcher) Patterns() []*Pattern {
 
 // Pattern defines a single regexp used to filter file paths.
 type Pattern struct {
+	matchType      matchType
 	cleanedPattern string
 	dirs           []string
 	regexp         *regexp.Regexp
 	exclusion      bool
 }
 
+type matchType int
+
+const (
+	unknownMatch matchType = iota
+	exactMatch
+	prefixMatch
+	suffixMatch
+	regexpMatch
+)
+
 func (p *Pattern) String() string {
 	return p.cleanedPattern
 }
@@ -301,15 +312,31 @@ func (p *Pattern) Exclusion() bool {
 }
 
 func (p *Pattern) match(path string) (bool, error) {
-	if p.regexp == nil {
+	if p.matchType == unknownMatch {
 		if err := p.compile(); err != nil {
 			return false, filepath.ErrBadPattern
 		}
 	}
 
-	b := p.regexp.MatchString(path)
+	switch p.matchType {
+	case exactMatch:
+		return path == p.cleanedPattern, nil
+	case prefixMatch:
+		// strip trailing **
+		return strings.HasPrefix(path, p.cleanedPattern[:len(p.cleanedPattern)-2]), nil
+	case suffixMatch:
+		// strip leading **
+		suffix := p.cleanedPattern[2:]
+		if strings.HasSuffix(path, suffix) {
+			return true, nil
+		}
+		// **/foo matches "foo"
+		return suffix[0] == os.PathSeparator && path == suffix[1:], nil
+	case regexpMatch:
+		return p.regexp.MatchString(path), nil
+	}
 
-	return b, nil
+	return false, nil
 }
 
 func (p *Pattern) compile() error {
@@ -326,7 +353,8 @@ func (p *Pattern) compile() error {
 		escSL += `\`
 	}
 
-	for scan.Peek() != scanner.EOF {
+	p.matchType = exactMatch
+	for i := 0; scan.Peek() != scanner.EOF; i++ {
 		ch := scan.Next()
 
 		if ch == '*' {
@@ -341,20 +369,32 @@ func (p *Pattern) compile() error {
 
 				if scan.Peek() == scanner.EOF {
 					// is "**EOF" - to align with .gitignore just accept all
-					regStr += ".*"
+					if p.matchType == exactMatch {
+						p.matchType = prefixMatch
+					} else {
+						regStr += ".*"
+						p.matchType = regexpMatch
+					}
 				} else {
 					// is "**"
 					// Note that this allows for any # of /'s (even 0) because
 					// the .* will eat everything, even /'s
 					regStr += "(.*" + escSL + ")?"
+					p.matchType = regexpMatch
+				}
+
+				if i == 0 {
+					p.matchType = suffixMatch
 				}
 			} else {
 				// is "*" so map it to anything but "/"
 				regStr += "[^" + escSL + "]*"
+				p.matchType = regexpMatch
 			}
 		} else if ch == '?' {
 			// "?" is any char except "/"
 			regStr += "[^" + escSL + "]"
+			p.matchType = regexpMatch
 		} else if shouldEscape(ch) {
 			// Escape some regexp special chars that have no meaning
 			// in golang's filepath.Match
@@ -371,14 +411,22 @@ func (p *Pattern) compile() error {
 			}
 			if scan.Peek() != scanner.EOF {
 				regStr += `\` + string(scan.Next())
+				p.matchType = regexpMatch
 			} else {
 				regStr += `\`
 			}
+		} else if ch == '[' || ch == ']' {
+			regStr += string(ch)
+			p.matchType = regexpMatch
 		} else {
 			regStr += string(ch)
 		}
 	}
 
+	if p.matchType != regexpMatch {
+		return nil
+	}
+
 	regStr += "$"
 
 	re, err := regexp.Compile(regStr)
@@ -387,6 +435,7 @@ func (p *Pattern) compile() error {
 	}
 
 	p.regexp = re
+	p.matchType = regexpMatch
 	return nil
 }
 

+ 4 - 0
pkg/fileutils/fileutils_test.go

@@ -382,10 +382,14 @@ func TestMatches(t *testing.T) {
 		{"a(b)c/def", "a(b)c/def", true},
 		{"a(b)c/def", "a(b)c/xyz", false},
 		{"a.|)$(}+{bc", "a.|)$(}+{bc", true},
+		{"dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", true},
+		{"dist/*.whl", "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", true},
 	}
 	multiPatternTests := []multiPatternTestCase{
 		{[]string{"**", "!util/docker/web"}, "util/docker/web/foo", false},
 		{[]string{"**", "!util/docker/web", "util/docker/web/foo"}, "util/docker/web/foo", true},
+		{[]string{"**", "!dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl"}, "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", false},
+		{[]string{"**", "!dist/*.whl"}, "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", false},
 	}
 
 	if runtime.GOOS != "windows" {