fileutils: Avoid compiling a regexp for simple patterns

If we detect that a pattern is either an exact match, prefix match, or
suffix match, use an optimized code path instead of compiling a regexp.

Signed-off-by: Aaron Lehmann <alehmann@netflix.com>
This commit is contained in:
Aaron Lehmann 2021-12-20 16:32:13 -08:00
parent 088afc99e4
commit dd66dcad9c
2 changed files with 58 additions and 5 deletions

View file

@ -285,12 +285,23 @@ func (pm *PatternMatcher) Patterns() []*Pattern {
// Pattern defines a single regexp used to filter file paths.
type Pattern struct {
matchType matchType
cleanedPattern string
dirs []string
regexp *regexp.Regexp
exclusion bool
}
type matchType int
const (
unknownMatch matchType = iota
exactMatch
prefixMatch
suffixMatch
regexpMatch
)
func (p *Pattern) String() string {
return p.cleanedPattern
}
@ -301,15 +312,31 @@ func (p *Pattern) Exclusion() bool {
}
func (p *Pattern) match(path string) (bool, error) {
if p.regexp == nil {
if p.matchType == unknownMatch {
if err := p.compile(); err != nil {
return false, filepath.ErrBadPattern
}
}
b := p.regexp.MatchString(path)
switch p.matchType {
case exactMatch:
return path == p.cleanedPattern, nil
case prefixMatch:
// strip trailing **
return strings.HasPrefix(path, p.cleanedPattern[:len(p.cleanedPattern)-2]), nil
case suffixMatch:
// strip leading **
suffix := p.cleanedPattern[2:]
if strings.HasSuffix(path, suffix) {
return true, nil
}
// **/foo matches "foo"
return suffix[0] == os.PathSeparator && path == suffix[1:], nil
case regexpMatch:
return p.regexp.MatchString(path), nil
}
return b, nil
return false, nil
}
func (p *Pattern) compile() error {
@ -326,7 +353,8 @@ func (p *Pattern) compile() error {
escSL += `\`
}
for scan.Peek() != scanner.EOF {
p.matchType = exactMatch
for i := 0; scan.Peek() != scanner.EOF; i++ {
ch := scan.Next()
if ch == '*' {
@ -341,20 +369,32 @@ func (p *Pattern) compile() error {
if scan.Peek() == scanner.EOF {
// is "**EOF" - to align with .gitignore just accept all
regStr += ".*"
if p.matchType == exactMatch {
p.matchType = prefixMatch
} else {
regStr += ".*"
p.matchType = regexpMatch
}
} else {
// is "**"
// Note that this allows for any # of /'s (even 0) because
// the .* will eat everything, even /'s
regStr += "(.*" + escSL + ")?"
p.matchType = regexpMatch
}
if i == 0 {
p.matchType = suffixMatch
}
} else {
// is "*" so map it to anything but "/"
regStr += "[^" + escSL + "]*"
p.matchType = regexpMatch
}
} else if ch == '?' {
// "?" is any char except "/"
regStr += "[^" + escSL + "]"
p.matchType = regexpMatch
} else if shouldEscape(ch) {
// Escape some regexp special chars that have no meaning
// in golang's filepath.Match
@ -371,14 +411,22 @@ func (p *Pattern) compile() error {
}
if scan.Peek() != scanner.EOF {
regStr += `\` + string(scan.Next())
p.matchType = regexpMatch
} else {
regStr += `\`
}
} else if ch == '[' || ch == ']' {
regStr += string(ch)
p.matchType = regexpMatch
} else {
regStr += string(ch)
}
}
if p.matchType != regexpMatch {
return nil
}
regStr += "$"
re, err := regexp.Compile(regStr)
@ -387,6 +435,7 @@ func (p *Pattern) compile() error {
}
p.regexp = re
p.matchType = regexpMatch
return nil
}

View file

@ -382,10 +382,14 @@ func TestMatches(t *testing.T) {
{"a(b)c/def", "a(b)c/def", true},
{"a(b)c/def", "a(b)c/xyz", false},
{"a.|)$(}+{bc", "a.|)$(}+{bc", true},
{"dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", true},
{"dist/*.whl", "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", true},
}
multiPatternTests := []multiPatternTestCase{
{[]string{"**", "!util/docker/web"}, "util/docker/web/foo", false},
{[]string{"**", "!util/docker/web", "util/docker/web/foo"}, "util/docker/web/foo", true},
{[]string{"**", "!dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl"}, "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", false},
{[]string{"**", "!dist/*.whl"}, "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", false},
}
if runtime.GOOS != "windows" {