regexp.go 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. package reference
  2. import (
  3. "regexp"
  4. "strings"
  5. )
  6. // DigestRegexp matches well-formed digests, including algorithm (e.g. "sha256:<encoded>").
  7. var DigestRegexp = regexp.MustCompile(digestPat)
  8. // DomainRegexp matches hostname or IP-addresses, optionally including a port
  9. // number. It defines the structure of potential domain components that may be
  10. // part of image names. This is purposely a subset of what is allowed by DNS to
  11. // ensure backwards compatibility with Docker image names. It may be a subset of
  12. // DNS domain name, an IPv4 address in decimal format, or an IPv6 address between
  13. // square brackets (excluding zone identifiers as defined by [RFC 6874] or special
  14. // addresses such as IPv4-Mapped).
  15. //
  16. // [RFC 6874]: https://www.rfc-editor.org/rfc/rfc6874.
  17. var DomainRegexp = regexp.MustCompile(domainAndPort)
  18. // IdentifierRegexp is the format for string identifier used as a
  19. // content addressable identifier using sha256. These identifiers
  20. // are like digests without the algorithm, since sha256 is used.
  21. var IdentifierRegexp = regexp.MustCompile(identifier)
  22. // NameRegexp is the format for the name component of references, including
  23. // an optional domain and port, but without tag or digest suffix.
  24. var NameRegexp = regexp.MustCompile(namePat)
  25. // ReferenceRegexp is the full supported format of a reference. The regexp
  26. // is anchored and has capturing groups for name, tag, and digest
  27. // components.
  28. var ReferenceRegexp = regexp.MustCompile(referencePat)
  29. // TagRegexp matches valid tag names. From [docker/docker:graph/tags.go].
  30. //
  31. // [docker/docker:graph/tags.go]: https://github.com/moby/moby/blob/v1.6.0/graph/tags.go#L26-L28
  32. var TagRegexp = regexp.MustCompile(tag)
  33. const (
  34. // alphanumeric defines the alphanumeric atom, typically a
  35. // component of names. This only allows lower case characters and digits.
  36. alphanumeric = `[a-z0-9]+`
  37. // separator defines the separators allowed to be embedded in name
  38. // components. This allows one period, one or two underscore and multiple
  39. // dashes. Repeated dashes and underscores are intentionally treated
  40. // differently. In order to support valid hostnames as name components,
  41. // supporting repeated dash was added. Additionally double underscore is
  42. // now allowed as a separator to loosen the restriction for previously
  43. // supported names.
  44. separator = `(?:[._]|__|[-]+)`
  45. // localhost is treated as a special value for domain-name. Any other
  46. // domain-name without a "." or a ":port" are considered a path component.
  47. localhost = `localhost`
  48. // domainNameComponent restricts the registry domain component of a
  49. // repository name to start with a component as defined by DomainRegexp.
  50. domainNameComponent = `(?:[a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])`
  51. // optionalPort matches an optional port-number including the port separator
  52. // (e.g. ":80").
  53. optionalPort = `(?::[0-9]+)?`
  54. // tag matches valid tag names. From docker/docker:graph/tags.go.
  55. tag = `[\w][\w.-]{0,127}`
  56. // digestPat matches well-formed digests, including algorithm (e.g. "sha256:<encoded>").
  57. //
  58. // TODO(thaJeztah): this should follow the same rules as https://pkg.go.dev/github.com/opencontainers/go-digest@v1.0.0#DigestRegexp
  59. // so that go-digest defines the canonical format. Note that the go-digest is
  60. // more relaxed:
  61. // - it allows multiple algorithms (e.g. "sha256+b64:<encoded>") to allow
  62. // future expansion of supported algorithms.
  63. // - it allows the "<encoded>" value to use urlsafe base64 encoding as defined
  64. // in [rfc4648, section 5].
  65. //
  66. // [rfc4648, section 5]: https://www.rfc-editor.org/rfc/rfc4648#section-5.
  67. digestPat = `[A-Za-z][A-Za-z0-9]*(?:[-_+.][A-Za-z][A-Za-z0-9]*)*[:][[:xdigit:]]{32,}`
  68. // identifier is the format for a content addressable identifier using sha256.
  69. // These identifiers are like digests without the algorithm, since sha256 is used.
  70. identifier = `([a-f0-9]{64})`
  71. // ipv6address are enclosed between square brackets and may be represented
  72. // in many ways, see rfc5952. Only IPv6 in compressed or uncompressed format
  73. // are allowed, IPv6 zone identifiers (rfc6874) or Special addresses such as
  74. // IPv4-Mapped are deliberately excluded.
  75. ipv6address = `\[(?:[a-fA-F0-9:]+)\]`
  76. )
  77. var (
  78. // domainName defines the structure of potential domain components
  79. // that may be part of image names. This is purposely a subset of what is
  80. // allowed by DNS to ensure backwards compatibility with Docker image
  81. // names. This includes IPv4 addresses on decimal format.
  82. domainName = domainNameComponent + anyTimes(`\.`+domainNameComponent)
  83. // host defines the structure of potential domains based on the URI
  84. // Host subcomponent on rfc3986. It may be a subset of DNS domain name,
  85. // or an IPv4 address in decimal format, or an IPv6 address between square
  86. // brackets (excluding zone identifiers as defined by rfc6874 or special
  87. // addresses such as IPv4-Mapped).
  88. host = `(?:` + domainName + `|` + ipv6address + `)`
  89. // allowed by the URI Host subcomponent on rfc3986 to ensure backwards
  90. // compatibility with Docker image names.
  91. domainAndPort = host + optionalPort
  92. // anchoredTagRegexp matches valid tag names, anchored at the start and
  93. // end of the matched string.
  94. anchoredTagRegexp = regexp.MustCompile(anchored(tag))
  95. // anchoredDigestRegexp matches valid digests, anchored at the start and
  96. // end of the matched string.
  97. anchoredDigestRegexp = regexp.MustCompile(anchored(digestPat))
  98. // pathComponent restricts path-components to start with an alphanumeric
  99. // character, with following parts able to be separated by a separator
  100. // (one period, one or two underscore and multiple dashes).
  101. pathComponent = alphanumeric + anyTimes(separator+alphanumeric)
  102. // remoteName matches the remote-name of a repository. It consists of one
  103. // or more forward slash (/) delimited path-components:
  104. //
  105. // pathComponent[[/pathComponent] ...] // e.g., "library/ubuntu"
  106. remoteName = pathComponent + anyTimes(`/`+pathComponent)
  107. namePat = optional(domainAndPort+`/`) + remoteName
  108. // anchoredNameRegexp is used to parse a name value, capturing the
  109. // domain and trailing components.
  110. anchoredNameRegexp = regexp.MustCompile(anchored(optional(capture(domainAndPort), `/`), capture(remoteName)))
  111. referencePat = anchored(capture(namePat), optional(`:`, capture(tag)), optional(`@`, capture(digestPat)))
  112. // anchoredIdentifierRegexp is used to check or match an
  113. // identifier value, anchored at start and end of string.
  114. anchoredIdentifierRegexp = regexp.MustCompile(anchored(identifier))
  115. )
  116. // optional wraps the expression in a non-capturing group and makes the
  117. // production optional.
  118. func optional(res ...string) string {
  119. return `(?:` + strings.Join(res, "") + `)?`
  120. }
  121. // anyTimes wraps the expression in a non-capturing group that can occur
  122. // any number of times.
  123. func anyTimes(res ...string) string {
  124. return `(?:` + strings.Join(res, "") + `)*`
  125. }
  126. // capture wraps the expression in a capturing group.
  127. func capture(res ...string) string {
  128. return `(` + strings.Join(res, "") + `)`
  129. }
  130. // anchored anchors the regular expression by adding start and end delimiters.
  131. func anchored(res ...string) string {
  132. return `^` + strings.Join(res, "") + `$`
  133. }