Merge pull request #43095 from aaronlehmann/avoid-regexp-simple-cases
fileutils: Avoid compiling a regexp for simple patterns
This commit is contained in:
commit
40bb983175
2 changed files with 136 additions and 9 deletions
|
@ -285,12 +285,23 @@ func (pm *PatternMatcher) Patterns() []*Pattern {
|
|||
|
||||
// Pattern defines a single regexp used to filter file paths.
|
||||
type Pattern struct {
|
||||
matchType matchType
|
||||
cleanedPattern string
|
||||
dirs []string
|
||||
regexp *regexp.Regexp
|
||||
exclusion bool
|
||||
}
|
||||
|
||||
type matchType int
|
||||
|
||||
const (
|
||||
unknownMatch matchType = iota
|
||||
exactMatch
|
||||
prefixMatch
|
||||
suffixMatch
|
||||
regexpMatch
|
||||
)
|
||||
|
||||
func (p *Pattern) String() string {
|
||||
return p.cleanedPattern
|
||||
}
|
||||
|
@ -301,18 +312,34 @@ func (p *Pattern) Exclusion() bool {
|
|||
}
|
||||
|
||||
func (p *Pattern) match(path string) (bool, error) {
|
||||
if p.regexp == nil {
|
||||
if err := p.compile(); err != nil {
|
||||
if p.matchType == unknownMatch {
|
||||
if err := p.compile(string(os.PathSeparator)); err != nil {
|
||||
return false, filepath.ErrBadPattern
|
||||
}
|
||||
}
|
||||
|
||||
b := p.regexp.MatchString(path)
|
||||
switch p.matchType {
|
||||
case exactMatch:
|
||||
return path == p.cleanedPattern, nil
|
||||
case prefixMatch:
|
||||
// strip trailing **
|
||||
return strings.HasPrefix(path, p.cleanedPattern[:len(p.cleanedPattern)-2]), nil
|
||||
case suffixMatch:
|
||||
// strip leading **
|
||||
suffix := p.cleanedPattern[2:]
|
||||
if strings.HasSuffix(path, suffix) {
|
||||
return true, nil
|
||||
}
|
||||
// **/foo matches "foo"
|
||||
return suffix[0] == os.PathSeparator && path == suffix[1:], nil
|
||||
case regexpMatch:
|
||||
return p.regexp.MatchString(path), nil
|
||||
}
|
||||
|
||||
return b, nil
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (p *Pattern) compile() error {
|
||||
func (p *Pattern) compile(sl string) error {
|
||||
regStr := "^"
|
||||
pattern := p.cleanedPattern
|
||||
// Go through the pattern and convert it to a regexp.
|
||||
|
@ -320,13 +347,13 @@ func (p *Pattern) compile() error {
|
|||
var scan scanner.Scanner
|
||||
scan.Init(strings.NewReader(pattern))
|
||||
|
||||
sl := string(os.PathSeparator)
|
||||
escSL := sl
|
||||
if sl == `\` {
|
||||
escSL += `\`
|
||||
}
|
||||
|
||||
for scan.Peek() != scanner.EOF {
|
||||
p.matchType = exactMatch
|
||||
for i := 0; scan.Peek() != scanner.EOF; i++ {
|
||||
ch := scan.Next()
|
||||
|
||||
if ch == '*' {
|
||||
|
@ -341,20 +368,32 @@ func (p *Pattern) compile() error {
|
|||
|
||||
if scan.Peek() == scanner.EOF {
|
||||
// is "**EOF" - to align with .gitignore just accept all
|
||||
regStr += ".*"
|
||||
if p.matchType == exactMatch {
|
||||
p.matchType = prefixMatch
|
||||
} else {
|
||||
regStr += ".*"
|
||||
p.matchType = regexpMatch
|
||||
}
|
||||
} else {
|
||||
// is "**"
|
||||
// Note that this allows for any # of /'s (even 0) because
|
||||
// the .* will eat everything, even /'s
|
||||
regStr += "(.*" + escSL + ")?"
|
||||
p.matchType = regexpMatch
|
||||
}
|
||||
|
||||
if i == 0 {
|
||||
p.matchType = suffixMatch
|
||||
}
|
||||
} else {
|
||||
// is "*" so map it to anything but "/"
|
||||
regStr += "[^" + escSL + "]*"
|
||||
p.matchType = regexpMatch
|
||||
}
|
||||
} else if ch == '?' {
|
||||
// "?" is any char except "/"
|
||||
regStr += "[^" + escSL + "]"
|
||||
p.matchType = regexpMatch
|
||||
} else if shouldEscape(ch) {
|
||||
// Escape some regexp special chars that have no meaning
|
||||
// in golang's filepath.Match
|
||||
|
@ -371,14 +410,22 @@ func (p *Pattern) compile() error {
|
|||
}
|
||||
if scan.Peek() != scanner.EOF {
|
||||
regStr += `\` + string(scan.Next())
|
||||
p.matchType = regexpMatch
|
||||
} else {
|
||||
regStr += `\`
|
||||
}
|
||||
} else if ch == '[' || ch == ']' {
|
||||
regStr += string(ch)
|
||||
p.matchType = regexpMatch
|
||||
} else {
|
||||
regStr += string(ch)
|
||||
}
|
||||
}
|
||||
|
||||
if p.matchType != regexpMatch {
|
||||
return nil
|
||||
}
|
||||
|
||||
regStr += "$"
|
||||
|
||||
re, err := regexp.Compile(regStr)
|
||||
|
@ -387,6 +434,7 @@ func (p *Pattern) compile() error {
|
|||
}
|
||||
|
||||
p.regexp = re
|
||||
p.matchType = regexpMatch
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -382,10 +382,14 @@ func TestMatches(t *testing.T) {
|
|||
{"a(b)c/def", "a(b)c/def", true},
|
||||
{"a(b)c/def", "a(b)c/xyz", false},
|
||||
{"a.|)$(}+{bc", "a.|)$(}+{bc", true},
|
||||
{"dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", true},
|
||||
{"dist/*.whl", "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", true},
|
||||
}
|
||||
multiPatternTests := []multiPatternTestCase{
|
||||
{[]string{"**", "!util/docker/web"}, "util/docker/web/foo", false},
|
||||
{[]string{"**", "!util/docker/web", "util/docker/web/foo"}, "util/docker/web/foo", true},
|
||||
{[]string{"**", "!dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl"}, "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", false},
|
||||
{[]string{"**", "!dist/*.whl"}, "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", false},
|
||||
}
|
||||
|
||||
if runtime.GOOS != "windows" {
|
||||
|
@ -671,7 +675,7 @@ func errp(e error) string {
|
|||
return e.Error()
|
||||
}
|
||||
|
||||
// TestMatch test's our version of filepath.Match, called regexpMatch.
|
||||
// TestMatch tests our version of filepath.Match, called Matches.
|
||||
func TestMatch(t *testing.T) {
|
||||
for _, tt := range matchTests {
|
||||
pattern := tt.pattern
|
||||
|
@ -690,3 +694,78 @@ func TestMatch(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
type compileTestCase struct {
|
||||
pattern string
|
||||
matchType matchType
|
||||
compiledRegexp string
|
||||
windowsCompiledRegexp string
|
||||
}
|
||||
|
||||
var compileTests = []compileTestCase{
|
||||
{"*", regexpMatch, `^[^/]*$`, `^[^\\]*$`},
|
||||
{"file*", regexpMatch, `^file[^/]*$`, `^file[^\\]*$`},
|
||||
{"*file", regexpMatch, `^[^/]*file$`, `^[^\\]*file$`},
|
||||
{"a*/b", regexpMatch, `^a[^/]*/b$`, `^a[^\\]*\\b$`},
|
||||
{"**", suffixMatch, "", ""},
|
||||
{"**/**", regexpMatch, `^(.*/)?.*$`, `^(.*\\)?.*$`},
|
||||
{"dir/**", prefixMatch, "", ""},
|
||||
{"**/dir", suffixMatch, "", ""},
|
||||
{"**/dir2/*", regexpMatch, `^(.*/)?dir2/[^/]*$`, `^(.*\\)?dir2\\[^\\]*$`},
|
||||
{"**/dir2/**", regexpMatch, `^(.*/)?dir2/.*$`, `^(.*\\)?dir2\\.*$`},
|
||||
{"**file", suffixMatch, "", ""},
|
||||
{"**/file*txt", regexpMatch, `^(.*/)?file[^/]*txt$`, `^(.*\\)?file[^\\]*txt$`},
|
||||
{"**/**/*.txt", regexpMatch, `^(.*/)?(.*/)?[^/]*\.txt$`, `^(.*\\)?(.*\\)?[^\\]*\.txt$`},
|
||||
{"a[b-d]e", regexpMatch, `^a[b-d]e$`, `^a[b-d]e$`},
|
||||
{".*", regexpMatch, `^\.[^/]*$`, `^\.[^\\]*$`},
|
||||
{"abc.def", exactMatch, "", ""},
|
||||
{"abc?def", regexpMatch, `^abc[^/]def$`, `^abc[^\\]def$`},
|
||||
{"**/foo/bar", suffixMatch, "", ""},
|
||||
{"a(b)c/def", exactMatch, "", ""},
|
||||
{"a.|)$(}+{bc", exactMatch, "", ""},
|
||||
{"dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", exactMatch, "", ""},
|
||||
}
|
||||
|
||||
// TestCompile confirms that "compile" assigns the correct match type to a
|
||||
// variety of test case patterns. If the match type is regexp, it also confirms
|
||||
// that the compiled regexp matches the expected regexp.
|
||||
func TestCompile(t *testing.T) {
|
||||
t.Run("slash", testCompile("/"))
|
||||
t.Run("backslash", testCompile(`\`))
|
||||
}
|
||||
|
||||
func testCompile(sl string) func(*testing.T) {
|
||||
return func(t *testing.T) {
|
||||
for _, tt := range compileTests {
|
||||
// Avoid NewPatternMatcher, which has platform-specific behavior
|
||||
pm := &PatternMatcher{
|
||||
patterns: make([]*Pattern, 1),
|
||||
}
|
||||
pattern := path.Clean(tt.pattern)
|
||||
if sl != "/" {
|
||||
pattern = strings.ReplaceAll(pattern, "/", sl)
|
||||
}
|
||||
newp := &Pattern{}
|
||||
newp.cleanedPattern = pattern
|
||||
newp.dirs = strings.Split(pattern, sl)
|
||||
pm.patterns[0] = newp
|
||||
|
||||
if err := pm.patterns[0].compile(sl); err != nil {
|
||||
t.Fatalf("Failed to compile pattern %q: %v", pattern, err)
|
||||
}
|
||||
if pm.patterns[0].matchType != tt.matchType {
|
||||
t.Errorf("pattern %q: matchType = %v, want %v", pattern, pm.patterns[0].matchType, tt.matchType)
|
||||
continue
|
||||
}
|
||||
if tt.matchType == regexpMatch {
|
||||
if sl == `\` {
|
||||
if pm.patterns[0].regexp.String() != tt.windowsCompiledRegexp {
|
||||
t.Errorf("pattern %q: regexp = %s, want %s", pattern, pm.patterns[0].regexp, tt.windowsCompiledRegexp)
|
||||
}
|
||||
} else if pm.patterns[0].regexp.String() != tt.compiledRegexp {
|
||||
t.Errorf("pattern %q: regexp = %s, want %s", pattern, pm.patterns[0].regexp, tt.compiledRegexp)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue