fileutils.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. package fileutils // import "github.com/docker/docker/pkg/fileutils"
  2. import (
  3. "errors"
  4. "fmt"
  5. "io"
  6. "os"
  7. "path/filepath"
  8. "regexp"
  9. "strings"
  10. "text/scanner"
  11. "unicode/utf8"
  12. )
  13. // escapeBytes is a bitmap used to check whether a character should be escaped when creating the regex.
  14. var escapeBytes [8]byte
  15. // shouldEscape reports whether a rune should be escaped as part of the regex.
  16. //
  17. // This only includes characters that require escaping in regex but are also NOT valid filepath pattern characters.
  18. // Additionally, '\' is not excluded because there is specific logic to properly handle this, as it's a path separator
  19. // on Windows.
  20. //
  21. // Adapted from regexp::QuoteMeta in go stdlib.
  22. // See https://cs.opensource.google/go/go/+/refs/tags/go1.17.2:src/regexp/regexp.go;l=703-715;drc=refs%2Ftags%2Fgo1.17.2
  23. func shouldEscape(b rune) bool {
  24. return b < utf8.RuneSelf && escapeBytes[b%8]&(1<<(b/8)) != 0
  25. }
  26. func init() {
  27. for _, b := range []byte(`.+()|{}$`) {
  28. escapeBytes[b%8] |= 1 << (b / 8)
  29. }
  30. }
  31. // PatternMatcher allows checking paths against a list of patterns
  32. type PatternMatcher struct {
  33. patterns []*Pattern
  34. exclusions bool
  35. }
  36. // NewPatternMatcher creates a new matcher object for specific patterns that can
  37. // be used later to match against patterns against paths
  38. func NewPatternMatcher(patterns []string) (*PatternMatcher, error) {
  39. pm := &PatternMatcher{
  40. patterns: make([]*Pattern, 0, len(patterns)),
  41. }
  42. for _, p := range patterns {
  43. // Eliminate leading and trailing whitespace.
  44. p = strings.TrimSpace(p)
  45. if p == "" {
  46. continue
  47. }
  48. p = filepath.Clean(p)
  49. newp := &Pattern{}
  50. if p[0] == '!' {
  51. if len(p) == 1 {
  52. return nil, errors.New("illegal exclusion pattern: \"!\"")
  53. }
  54. newp.exclusion = true
  55. p = p[1:]
  56. pm.exclusions = true
  57. }
  58. // Do some syntax checking on the pattern.
  59. // filepath's Match() has some really weird rules that are inconsistent
  60. // so instead of trying to dup their logic, just call Match() for its
  61. // error state and if there is an error in the pattern return it.
  62. // If this becomes an issue we can remove this since its really only
  63. // needed in the error (syntax) case - which isn't really critical.
  64. if _, err := filepath.Match(p, "."); err != nil {
  65. return nil, err
  66. }
  67. newp.cleanedPattern = p
  68. newp.dirs = strings.Split(p, string(os.PathSeparator))
  69. pm.patterns = append(pm.patterns, newp)
  70. }
  71. return pm, nil
  72. }
  73. // Matches returns true if "file" matches any of the patterns
  74. // and isn't excluded by any of the subsequent patterns.
  75. //
  76. // The "file" argument should be a slash-delimited path.
  77. //
  78. // Matches is not safe to call concurrently.
  79. //
  80. // This implementation is buggy (it only checks a single parent dir against the
  81. // pattern) and will be removed soon. Use either MatchesOrParentMatches or
  82. // MatchesUsingParentResult instead.
  83. func (pm *PatternMatcher) Matches(file string) (bool, error) {
  84. matched := false
  85. file = filepath.FromSlash(file)
  86. parentPath := filepath.Dir(file)
  87. parentPathDirs := strings.Split(parentPath, string(os.PathSeparator))
  88. for _, pattern := range pm.patterns {
  89. // Skip evaluation if this is an inclusion and the filename
  90. // already matched the pattern, or it's an exclusion and it has
  91. // not matched the pattern yet.
  92. if pattern.exclusion != matched {
  93. continue
  94. }
  95. match, err := pattern.match(file)
  96. if err != nil {
  97. return false, err
  98. }
  99. if !match && parentPath != "." {
  100. // Check to see if the pattern matches one of our parent dirs.
  101. if len(pattern.dirs) <= len(parentPathDirs) {
  102. match, _ = pattern.match(strings.Join(parentPathDirs[:len(pattern.dirs)], string(os.PathSeparator)))
  103. }
  104. }
  105. if match {
  106. matched = !pattern.exclusion
  107. }
  108. }
  109. return matched, nil
  110. }
  111. // MatchesOrParentMatches returns true if "file" matches any of the patterns
  112. // and isn't excluded by any of the subsequent patterns.
  113. //
  114. // The "file" argument should be a slash-delimited path.
  115. //
  116. // Matches is not safe to call concurrently.
  117. func (pm *PatternMatcher) MatchesOrParentMatches(file string) (bool, error) {
  118. matched := false
  119. file = filepath.FromSlash(file)
  120. parentPath := filepath.Dir(file)
  121. parentPathDirs := strings.Split(parentPath, string(os.PathSeparator))
  122. for _, pattern := range pm.patterns {
  123. // Skip evaluation if this is an inclusion and the filename
  124. // already matched the pattern, or it's an exclusion and it has
  125. // not matched the pattern yet.
  126. if pattern.exclusion != matched {
  127. continue
  128. }
  129. match, err := pattern.match(file)
  130. if err != nil {
  131. return false, err
  132. }
  133. if !match && parentPath != "." {
  134. // Check to see if the pattern matches one of our parent dirs.
  135. for i := range parentPathDirs {
  136. match, _ = pattern.match(strings.Join(parentPathDirs[:i+1], string(os.PathSeparator)))
  137. if match {
  138. break
  139. }
  140. }
  141. }
  142. if match {
  143. matched = !pattern.exclusion
  144. }
  145. }
  146. return matched, nil
  147. }
  148. // MatchesUsingParentResult returns true if "file" matches any of the patterns
  149. // and isn't excluded by any of the subsequent patterns. The functionality is
  150. // the same as Matches, but as an optimization, the caller keeps track of
  151. // whether the parent directory matched.
  152. //
  153. // The "file" argument should be a slash-delimited path.
  154. //
  155. // MatchesUsingParentResult is not safe to call concurrently.
  156. //
  157. // Deprecated in favor of MatchesUsingParentResults: this function does behave
  158. // correctly in some cases (see https://github.com/docker/buildx/issues/850).
  159. func (pm *PatternMatcher) MatchesUsingParentResult(file string, parentMatched bool) (bool, error) {
  160. matched := parentMatched
  161. file = filepath.FromSlash(file)
  162. for _, pattern := range pm.patterns {
  163. // Skip evaluation if this is an inclusion and the filename
  164. // already matched the pattern, or it's an exclusion and it has
  165. // not matched the pattern yet.
  166. if pattern.exclusion != matched {
  167. continue
  168. }
  169. match, err := pattern.match(file)
  170. if err != nil {
  171. return false, err
  172. }
  173. if match {
  174. matched = !pattern.exclusion
  175. }
  176. }
  177. return matched, nil
  178. }
  179. // MatchInfo tracks information about parent dir matches while traversing a
  180. // filesystem.
  181. type MatchInfo struct {
  182. parentMatched []bool
  183. }
  184. // MatchesUsingParentResults returns true if "file" matches any of the patterns
  185. // and isn't excluded by any of the subsequent patterns. The functionality is
  186. // the same as Matches, but as an optimization, the caller passes in
  187. // intermediate results from matching the parent directory.
  188. //
  189. // The "file" argument should be a slash-delimited path.
  190. //
  191. // MatchesUsingParentResults is not safe to call concurrently.
  192. func (pm *PatternMatcher) MatchesUsingParentResults(file string, parentMatchInfo MatchInfo) (bool, MatchInfo, error) {
  193. parentMatched := parentMatchInfo.parentMatched
  194. if len(parentMatched) != 0 && len(parentMatched) != len(pm.patterns) {
  195. return false, MatchInfo{}, errors.New("wrong number of values in parentMatched")
  196. }
  197. file = filepath.FromSlash(file)
  198. matched := false
  199. matchInfo := MatchInfo{
  200. parentMatched: make([]bool, len(pm.patterns)),
  201. }
  202. for i, pattern := range pm.patterns {
  203. match := false
  204. // If the parent matched this pattern, we don't need to recheck.
  205. if len(parentMatched) != 0 {
  206. match = parentMatched[i]
  207. }
  208. if !match {
  209. // Skip evaluation if this is an inclusion and the filename
  210. // already matched the pattern, or it's an exclusion and it has
  211. // not matched the pattern yet.
  212. if pattern.exclusion != matched {
  213. continue
  214. }
  215. var err error
  216. match, err = pattern.match(file)
  217. if err != nil {
  218. return false, matchInfo, err
  219. }
  220. // If the zero value of MatchInfo was passed in, we don't have
  221. // any information about the parent dir's match results, and we
  222. // apply the same logic as MatchesOrParentMatches.
  223. if len(parentMatched) == 0 {
  224. if parentPath := filepath.Dir(file); parentPath != "." {
  225. parentPathDirs := strings.Split(parentPath, string(os.PathSeparator))
  226. // Check to see if the pattern matches one of our parent dirs.
  227. for i := range parentPathDirs {
  228. match, _ = pattern.match(strings.Join(parentPathDirs[:i+1], string(os.PathSeparator)))
  229. if match {
  230. break
  231. }
  232. }
  233. }
  234. }
  235. }
  236. matchInfo.parentMatched[i] = match
  237. if match {
  238. matched = !pattern.exclusion
  239. }
  240. }
  241. return matched, matchInfo, nil
  242. }
  243. // Exclusions returns true if any of the patterns define exclusions
  244. func (pm *PatternMatcher) Exclusions() bool {
  245. return pm.exclusions
  246. }
  247. // Patterns returns array of active patterns
  248. func (pm *PatternMatcher) Patterns() []*Pattern {
  249. return pm.patterns
  250. }
  251. // Pattern defines a single regexp used to filter file paths.
  252. type Pattern struct {
  253. cleanedPattern string
  254. dirs []string
  255. regexp *regexp.Regexp
  256. exclusion bool
  257. }
  258. func (p *Pattern) String() string {
  259. return p.cleanedPattern
  260. }
  261. // Exclusion returns true if this pattern defines exclusion
  262. func (p *Pattern) Exclusion() bool {
  263. return p.exclusion
  264. }
  265. func (p *Pattern) match(path string) (bool, error) {
  266. if p.regexp == nil {
  267. if err := p.compile(); err != nil {
  268. return false, filepath.ErrBadPattern
  269. }
  270. }
  271. b := p.regexp.MatchString(path)
  272. return b, nil
  273. }
  274. func (p *Pattern) compile() error {
  275. regStr := "^"
  276. pattern := p.cleanedPattern
  277. // Go through the pattern and convert it to a regexp.
  278. // We use a scanner so we can support utf-8 chars.
  279. var scan scanner.Scanner
  280. scan.Init(strings.NewReader(pattern))
  281. sl := string(os.PathSeparator)
  282. escSL := sl
  283. if sl == `\` {
  284. escSL += `\`
  285. }
  286. for scan.Peek() != scanner.EOF {
  287. ch := scan.Next()
  288. if ch == '*' {
  289. if scan.Peek() == '*' {
  290. // is some flavor of "**"
  291. scan.Next()
  292. // Treat **/ as ** so eat the "/"
  293. if string(scan.Peek()) == sl {
  294. scan.Next()
  295. }
  296. if scan.Peek() == scanner.EOF {
  297. // is "**EOF" - to align with .gitignore just accept all
  298. regStr += ".*"
  299. } else {
  300. // is "**"
  301. // Note that this allows for any # of /'s (even 0) because
  302. // the .* will eat everything, even /'s
  303. regStr += "(.*" + escSL + ")?"
  304. }
  305. } else {
  306. // is "*" so map it to anything but "/"
  307. regStr += "[^" + escSL + "]*"
  308. }
  309. } else if ch == '?' {
  310. // "?" is any char except "/"
  311. regStr += "[^" + escSL + "]"
  312. } else if shouldEscape(ch) {
  313. // Escape some regexp special chars that have no meaning
  314. // in golang's filepath.Match
  315. regStr += `\` + string(ch)
  316. } else if ch == '\\' {
  317. // escape next char. Note that a trailing \ in the pattern
  318. // will be left alone (but need to escape it)
  319. if sl == `\` {
  320. // On windows map "\" to "\\", meaning an escaped backslash,
  321. // and then just continue because filepath.Match on
  322. // Windows doesn't allow escaping at all
  323. regStr += escSL
  324. continue
  325. }
  326. if scan.Peek() != scanner.EOF {
  327. regStr += `\` + string(scan.Next())
  328. } else {
  329. regStr += `\`
  330. }
  331. } else {
  332. regStr += string(ch)
  333. }
  334. }
  335. regStr += "$"
  336. re, err := regexp.Compile(regStr)
  337. if err != nil {
  338. return err
  339. }
  340. p.regexp = re
  341. return nil
  342. }
  343. // Matches returns true if file matches any of the patterns
  344. // and isn't excluded by any of the subsequent patterns.
  345. //
  346. // This implementation is buggy (it only checks a single parent dir against the
  347. // pattern) and will be removed soon. Use MatchesOrParentMatches instead.
  348. func Matches(file string, patterns []string) (bool, error) {
  349. pm, err := NewPatternMatcher(patterns)
  350. if err != nil {
  351. return false, err
  352. }
  353. file = filepath.Clean(file)
  354. if file == "." {
  355. // Don't let them exclude everything, kind of silly.
  356. return false, nil
  357. }
  358. return pm.Matches(file)
  359. }
  360. // MatchesOrParentMatches returns true if file matches any of the patterns
  361. // and isn't excluded by any of the subsequent patterns.
  362. func MatchesOrParentMatches(file string, patterns []string) (bool, error) {
  363. pm, err := NewPatternMatcher(patterns)
  364. if err != nil {
  365. return false, err
  366. }
  367. file = filepath.Clean(file)
  368. if file == "." {
  369. // Don't let them exclude everything, kind of silly.
  370. return false, nil
  371. }
  372. return pm.MatchesOrParentMatches(file)
  373. }
  374. // CopyFile copies from src to dst until either EOF is reached
  375. // on src or an error occurs. It verifies src exists and removes
  376. // the dst if it exists.
  377. func CopyFile(src, dst string) (int64, error) {
  378. cleanSrc := filepath.Clean(src)
  379. cleanDst := filepath.Clean(dst)
  380. if cleanSrc == cleanDst {
  381. return 0, nil
  382. }
  383. sf, err := os.Open(cleanSrc)
  384. if err != nil {
  385. return 0, err
  386. }
  387. defer sf.Close()
  388. if err := os.Remove(cleanDst); err != nil && !os.IsNotExist(err) {
  389. return 0, err
  390. }
  391. df, err := os.Create(cleanDst)
  392. if err != nil {
  393. return 0, err
  394. }
  395. defer df.Close()
  396. return io.Copy(df, sf)
  397. }
  398. // ReadSymlinkedDirectory returns the target directory of a symlink.
  399. // The target of the symbolic link may not be a file.
  400. func ReadSymlinkedDirectory(path string) (string, error) {
  401. var realPath string
  402. var err error
  403. if realPath, err = filepath.Abs(path); err != nil {
  404. return "", fmt.Errorf("unable to get absolute path for %s: %s", path, err)
  405. }
  406. if realPath, err = filepath.EvalSymlinks(realPath); err != nil {
  407. return "", fmt.Errorf("failed to canonicalise path for %s: %s", path, err)
  408. }
  409. realPathInfo, err := os.Stat(realPath)
  410. if err != nil {
  411. return "", fmt.Errorf("failed to stat target '%s' of '%s': %s", realPath, path, err)
  412. }
  413. if !realPathInfo.Mode().IsDir() {
  414. return "", fmt.Errorf("canonical path points to a file '%s'", realPath)
  415. }
  416. return realPath, nil
  417. }
  418. // CreateIfNotExists creates a file or a directory only if it does not already exist.
  419. func CreateIfNotExists(path string, isDir bool) error {
  420. if _, err := os.Stat(path); err != nil {
  421. if os.IsNotExist(err) {
  422. if isDir {
  423. return os.MkdirAll(path, 0755)
  424. }
  425. if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
  426. return err
  427. }
  428. f, err := os.OpenFile(path, os.O_CREATE, 0755)
  429. if err != nil {
  430. return err
  431. }
  432. f.Close()
  433. }
  434. }
  435. return nil
  436. }