strconv.go 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package tar
  5. import (
  6. "bytes"
  7. "fmt"
  8. "strconv"
  9. "strings"
  10. "time"
  11. )
  12. // hasNUL reports whether the NUL character exists within s.
  13. func hasNUL(s string) bool {
  14. return strings.IndexByte(s, 0) >= 0
  15. }
  16. // isASCII reports whether the input is an ASCII C-style string.
  17. func isASCII(s string) bool {
  18. for _, c := range s {
  19. if c >= 0x80 || c == 0x00 {
  20. return false
  21. }
  22. }
  23. return true
  24. }
  25. // toASCII converts the input to an ASCII C-style string.
  26. // This a best effort conversion, so invalid characters are dropped.
  27. func toASCII(s string) string {
  28. if isASCII(s) {
  29. return s
  30. }
  31. b := make([]byte, 0, len(s))
  32. for _, c := range s {
  33. if c < 0x80 && c != 0x00 {
  34. b = append(b, byte(c))
  35. }
  36. }
  37. return string(b)
  38. }
  39. type parser struct {
  40. err error // Last error seen
  41. }
  42. type formatter struct {
  43. err error // Last error seen
  44. }
  45. // parseString parses bytes as a NUL-terminated C-style string.
  46. // If a NUL byte is not found then the whole slice is returned as a string.
  47. func (*parser) parseString(b []byte) string {
  48. if i := bytes.IndexByte(b, 0); i >= 0 {
  49. return string(b[:i])
  50. }
  51. return string(b)
  52. }
  53. // formatString copies s into b, NUL-terminating if possible.
  54. func (f *formatter) formatString(b []byte, s string) {
  55. if len(s) > len(b) {
  56. f.err = ErrFieldTooLong
  57. }
  58. copy(b, s)
  59. if len(s) < len(b) {
  60. b[len(s)] = 0
  61. }
  62. // Some buggy readers treat regular files with a trailing slash
  63. // in the V7 path field as a directory even though the full path
  64. // recorded elsewhere (e.g., via PAX record) contains no trailing slash.
  65. if len(s) > len(b) && b[len(b)-1] == '/' {
  66. n := len(strings.TrimRight(s[:len(b)], "/"))
  67. b[n] = 0 // Replace trailing slash with NUL terminator
  68. }
  69. }
  70. // fitsInBase256 reports whether x can be encoded into n bytes using base-256
  71. // encoding. Unlike octal encoding, base-256 encoding does not require that the
  72. // string ends with a NUL character. Thus, all n bytes are available for output.
  73. //
  74. // If operating in binary mode, this assumes strict GNU binary mode; which means
  75. // that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
  76. // equivalent to the sign bit in two's complement form.
  77. func fitsInBase256(n int, x int64) bool {
  78. binBits := uint(n-1) * 8
  79. return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
  80. }
  81. // parseNumeric parses the input as being encoded in either base-256 or octal.
  82. // This function may return negative numbers.
  83. // If parsing fails or an integer overflow occurs, err will be set.
  84. func (p *parser) parseNumeric(b []byte) int64 {
  85. // Check for base-256 (binary) format first.
  86. // If the first bit is set, then all following bits constitute a two's
  87. // complement encoded number in big-endian byte order.
  88. if len(b) > 0 && b[0]&0x80 != 0 {
  89. // Handling negative numbers relies on the following identity:
  90. // -a-1 == ^a
  91. //
  92. // If the number is negative, we use an inversion mask to invert the
  93. // data bytes and treat the value as an unsigned number.
  94. var inv byte // 0x00 if positive or zero, 0xff if negative
  95. if b[0]&0x40 != 0 {
  96. inv = 0xff
  97. }
  98. var x uint64
  99. for i, c := range b {
  100. c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
  101. if i == 0 {
  102. c &= 0x7f // Ignore signal bit in first byte
  103. }
  104. if (x >> 56) > 0 {
  105. p.err = ErrHeader // Integer overflow
  106. return 0
  107. }
  108. x = x<<8 | uint64(c)
  109. }
  110. if (x >> 63) > 0 {
  111. p.err = ErrHeader // Integer overflow
  112. return 0
  113. }
  114. if inv == 0xff {
  115. return ^int64(x)
  116. }
  117. return int64(x)
  118. }
  119. // Normal case is base-8 (octal) format.
  120. return p.parseOctal(b)
  121. }
  122. // formatNumeric encodes x into b using base-8 (octal) encoding if possible.
  123. // Otherwise it will attempt to use base-256 (binary) encoding.
  124. func (f *formatter) formatNumeric(b []byte, x int64) {
  125. if fitsInOctal(len(b), x) {
  126. f.formatOctal(b, x)
  127. return
  128. }
  129. if fitsInBase256(len(b), x) {
  130. for i := len(b) - 1; i >= 0; i-- {
  131. b[i] = byte(x)
  132. x >>= 8
  133. }
  134. b[0] |= 0x80 // Highest bit indicates binary format
  135. return
  136. }
  137. f.formatOctal(b, 0) // Last resort, just write zero
  138. f.err = ErrFieldTooLong
  139. }
  140. func (p *parser) parseOctal(b []byte) int64 {
  141. // Because unused fields are filled with NULs, we need
  142. // to skip leading NULs. Fields may also be padded with
  143. // spaces or NULs.
  144. // So we remove leading and trailing NULs and spaces to
  145. // be sure.
  146. b = bytes.Trim(b, " \x00")
  147. if len(b) == 0 {
  148. return 0
  149. }
  150. x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
  151. if perr != nil {
  152. p.err = ErrHeader
  153. }
  154. return int64(x)
  155. }
  156. func (f *formatter) formatOctal(b []byte, x int64) {
  157. if !fitsInOctal(len(b), x) {
  158. x = 0 // Last resort, just write zero
  159. f.err = ErrFieldTooLong
  160. }
  161. s := strconv.FormatInt(x, 8)
  162. // Add leading zeros, but leave room for a NUL.
  163. if n := len(b) - len(s) - 1; n > 0 {
  164. s = strings.Repeat("0", n) + s
  165. }
  166. f.formatString(b, s)
  167. }
  168. // fitsInOctal reports whether the integer x fits in a field n-bytes long
  169. // using octal encoding with the appropriate NUL terminator.
  170. func fitsInOctal(n int, x int64) bool {
  171. octBits := uint(n-1) * 3
  172. return x >= 0 && (n >= 22 || x < 1<<octBits)
  173. }
  174. // parsePAXTime takes a string of the form %d.%d as described in the PAX
  175. // specification. Note that this implementation allows for negative timestamps,
  176. // which is allowed for by the PAX specification, but not always portable.
  177. func parsePAXTime(s string) (time.Time, error) {
  178. const maxNanoSecondDigits = 9
  179. // Split string into seconds and sub-seconds parts.
  180. ss, sn := s, ""
  181. if pos := strings.IndexByte(s, '.'); pos >= 0 {
  182. ss, sn = s[:pos], s[pos+1:]
  183. }
  184. // Parse the seconds.
  185. secs, err := strconv.ParseInt(ss, 10, 64)
  186. if err != nil {
  187. return time.Time{}, ErrHeader
  188. }
  189. if len(sn) == 0 {
  190. return time.Unix(secs, 0), nil // No sub-second values
  191. }
  192. // Parse the nanoseconds.
  193. if strings.Trim(sn, "0123456789") != "" {
  194. return time.Time{}, ErrHeader
  195. }
  196. if len(sn) < maxNanoSecondDigits {
  197. sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad
  198. } else {
  199. sn = sn[:maxNanoSecondDigits] // Right truncate
  200. }
  201. nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed
  202. if len(ss) > 0 && ss[0] == '-' {
  203. return time.Unix(secs, -1*nsecs), nil // Negative correction
  204. }
  205. return time.Unix(secs, nsecs), nil
  206. }
  207. // formatPAXTime converts ts into a time of the form %d.%d as described in the
  208. // PAX specification. This function is capable of negative timestamps.
  209. func formatPAXTime(ts time.Time) (s string) {
  210. secs, nsecs := ts.Unix(), ts.Nanosecond()
  211. if nsecs == 0 {
  212. return strconv.FormatInt(secs, 10)
  213. }
  214. // If seconds is negative, then perform correction.
  215. sign := ""
  216. if secs < 0 {
  217. sign = "-" // Remember sign
  218. secs = -(secs + 1) // Add a second to secs
  219. nsecs = -(nsecs - 1E9) // Take that second away from nsecs
  220. }
  221. return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0")
  222. }
  223. // parsePAXRecord parses the input PAX record string into a key-value pair.
  224. // If parsing is successful, it will slice off the currently read record and
  225. // return the remainder as r.
  226. func parsePAXRecord(s string) (k, v, r string, err error) {
  227. // The size field ends at the first space.
  228. sp := strings.IndexByte(s, ' ')
  229. if sp == -1 {
  230. return "", "", s, ErrHeader
  231. }
  232. // Parse the first token as a decimal integer.
  233. n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
  234. if perr != nil || n < 5 || int64(len(s)) < n {
  235. return "", "", s, ErrHeader
  236. }
  237. // Extract everything between the space and the final newline.
  238. rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
  239. if nl != "\n" {
  240. return "", "", s, ErrHeader
  241. }
  242. // The first equals separates the key from the value.
  243. eq := strings.IndexByte(rec, '=')
  244. if eq == -1 {
  245. return "", "", s, ErrHeader
  246. }
  247. k, v = rec[:eq], rec[eq+1:]
  248. if !validPAXRecord(k, v) {
  249. return "", "", s, ErrHeader
  250. }
  251. return k, v, rem, nil
  252. }
  253. // formatPAXRecord formats a single PAX record, prefixing it with the
  254. // appropriate length.
  255. func formatPAXRecord(k, v string) (string, error) {
  256. if !validPAXRecord(k, v) {
  257. return "", ErrHeader
  258. }
  259. const padding = 3 // Extra padding for ' ', '=', and '\n'
  260. size := len(k) + len(v) + padding
  261. size += len(strconv.Itoa(size))
  262. record := strconv.Itoa(size) + " " + k + "=" + v + "\n"
  263. // Final adjustment if adding size field increased the record size.
  264. if len(record) != size {
  265. size = len(record)
  266. record = strconv.Itoa(size) + " " + k + "=" + v + "\n"
  267. }
  268. return record, nil
  269. }
  270. // validPAXRecord reports whether the key-value pair is valid where each
  271. // record is formatted as:
  272. // "%d %s=%s\n" % (size, key, value)
  273. //
  274. // Keys and values should be UTF-8, but the number of bad writers out there
  275. // forces us to be a more liberal.
  276. // Thus, we only reject all keys with NUL, and only reject NULs in values
  277. // for the PAX version of the USTAR string fields.
  278. // The key must not contain an '=' character.
  279. func validPAXRecord(k, v string) bool {
  280. if k == "" || strings.IndexByte(k, '=') >= 0 {
  281. return false
  282. }
  283. switch k {
  284. case paxPath, paxLinkpath, paxUname, paxGname:
  285. return !hasNUL(v)
  286. default:
  287. return !hasNUL(k)
  288. }
  289. }