fileutils: Avoid compiling a regexp for simple patterns
If we detect that a pattern is either an exact match, prefix match, or suffix match, use an optimized code path instead of compiling a regexp. Signed-off-by: Aaron Lehmann <alehmann@netflix.com>
This commit is contained in:
parent
088afc99e4
commit
dd66dcad9c
2 changed files with 58 additions and 5 deletions
|
@ -285,12 +285,23 @@ func (pm *PatternMatcher) Patterns() []*Pattern {
|
|||
|
||||
// Pattern defines a single regexp used to filter file paths.
|
||||
type Pattern struct {
|
||||
matchType matchType
|
||||
cleanedPattern string
|
||||
dirs []string
|
||||
regexp *regexp.Regexp
|
||||
exclusion bool
|
||||
}
|
||||
|
||||
type matchType int
|
||||
|
||||
const (
|
||||
unknownMatch matchType = iota
|
||||
exactMatch
|
||||
prefixMatch
|
||||
suffixMatch
|
||||
regexpMatch
|
||||
)
|
||||
|
||||
func (p *Pattern) String() string {
|
||||
return p.cleanedPattern
|
||||
}
|
||||
|
@ -301,15 +312,31 @@ func (p *Pattern) Exclusion() bool {
|
|||
}
|
||||
|
||||
func (p *Pattern) match(path string) (bool, error) {
|
||||
if p.regexp == nil {
|
||||
if p.matchType == unknownMatch {
|
||||
if err := p.compile(); err != nil {
|
||||
return false, filepath.ErrBadPattern
|
||||
}
|
||||
}
|
||||
|
||||
b := p.regexp.MatchString(path)
|
||||
switch p.matchType {
|
||||
case exactMatch:
|
||||
return path == p.cleanedPattern, nil
|
||||
case prefixMatch:
|
||||
// strip trailing **
|
||||
return strings.HasPrefix(path, p.cleanedPattern[:len(p.cleanedPattern)-2]), nil
|
||||
case suffixMatch:
|
||||
// strip leading **
|
||||
suffix := p.cleanedPattern[2:]
|
||||
if strings.HasSuffix(path, suffix) {
|
||||
return true, nil
|
||||
}
|
||||
// **/foo matches "foo"
|
||||
return suffix[0] == os.PathSeparator && path == suffix[1:], nil
|
||||
case regexpMatch:
|
||||
return p.regexp.MatchString(path), nil
|
||||
}
|
||||
|
||||
return b, nil
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (p *Pattern) compile() error {
|
||||
|
@ -326,7 +353,8 @@ func (p *Pattern) compile() error {
|
|||
escSL += `\`
|
||||
}
|
||||
|
||||
for scan.Peek() != scanner.EOF {
|
||||
p.matchType = exactMatch
|
||||
for i := 0; scan.Peek() != scanner.EOF; i++ {
|
||||
ch := scan.Next()
|
||||
|
||||
if ch == '*' {
|
||||
|
@ -341,20 +369,32 @@ func (p *Pattern) compile() error {
|
|||
|
||||
if scan.Peek() == scanner.EOF {
|
||||
// is "**EOF" - to align with .gitignore just accept all
|
||||
regStr += ".*"
|
||||
if p.matchType == exactMatch {
|
||||
p.matchType = prefixMatch
|
||||
} else {
|
||||
regStr += ".*"
|
||||
p.matchType = regexpMatch
|
||||
}
|
||||
} else {
|
||||
// is "**"
|
||||
// Note that this allows for any # of /'s (even 0) because
|
||||
// the .* will eat everything, even /'s
|
||||
regStr += "(.*" + escSL + ")?"
|
||||
p.matchType = regexpMatch
|
||||
}
|
||||
|
||||
if i == 0 {
|
||||
p.matchType = suffixMatch
|
||||
}
|
||||
} else {
|
||||
// is "*" so map it to anything but "/"
|
||||
regStr += "[^" + escSL + "]*"
|
||||
p.matchType = regexpMatch
|
||||
}
|
||||
} else if ch == '?' {
|
||||
// "?" is any char except "/"
|
||||
regStr += "[^" + escSL + "]"
|
||||
p.matchType = regexpMatch
|
||||
} else if shouldEscape(ch) {
|
||||
// Escape some regexp special chars that have no meaning
|
||||
// in golang's filepath.Match
|
||||
|
@ -371,14 +411,22 @@ func (p *Pattern) compile() error {
|
|||
}
|
||||
if scan.Peek() != scanner.EOF {
|
||||
regStr += `\` + string(scan.Next())
|
||||
p.matchType = regexpMatch
|
||||
} else {
|
||||
regStr += `\`
|
||||
}
|
||||
} else if ch == '[' || ch == ']' {
|
||||
regStr += string(ch)
|
||||
p.matchType = regexpMatch
|
||||
} else {
|
||||
regStr += string(ch)
|
||||
}
|
||||
}
|
||||
|
||||
if p.matchType != regexpMatch {
|
||||
return nil
|
||||
}
|
||||
|
||||
regStr += "$"
|
||||
|
||||
re, err := regexp.Compile(regStr)
|
||||
|
@ -387,6 +435,7 @@ func (p *Pattern) compile() error {
|
|||
}
|
||||
|
||||
p.regexp = re
|
||||
p.matchType = regexpMatch
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -382,10 +382,14 @@ func TestMatches(t *testing.T) {
|
|||
{"a(b)c/def", "a(b)c/def", true},
|
||||
{"a(b)c/def", "a(b)c/xyz", false},
|
||||
{"a.|)$(}+{bc", "a.|)$(}+{bc", true},
|
||||
{"dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", true},
|
||||
{"dist/*.whl", "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", true},
|
||||
}
|
||||
multiPatternTests := []multiPatternTestCase{
|
||||
{[]string{"**", "!util/docker/web"}, "util/docker/web/foo", false},
|
||||
{[]string{"**", "!util/docker/web", "util/docker/web/foo"}, "util/docker/web/foo", true},
|
||||
{[]string{"**", "!dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl"}, "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", false},
|
||||
{[]string{"**", "!dist/*.whl"}, "dist/proxy.py-2.4.0rc3.dev36+g08acad9-py3-none-any.whl", false},
|
||||
}
|
||||
|
||||
if runtime.GOOS != "windows" {
|
||||
|
|
Loading…
Reference in a new issue