123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326 |
- // Copyright 2016 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package tar
- import (
- "bytes"
- "fmt"
- "strconv"
- "strings"
- "time"
- )
- // hasNUL reports whether the NUL character exists within s.
- func hasNUL(s string) bool {
- return strings.IndexByte(s, 0) >= 0
- }
- // isASCII reports whether the input is an ASCII C-style string.
- func isASCII(s string) bool {
- for _, c := range s {
- if c >= 0x80 || c == 0x00 {
- return false
- }
- }
- return true
- }
- // toASCII converts the input to an ASCII C-style string.
- // This a best effort conversion, so invalid characters are dropped.
- func toASCII(s string) string {
- if isASCII(s) {
- return s
- }
- b := make([]byte, 0, len(s))
- for _, c := range s {
- if c < 0x80 && c != 0x00 {
- b = append(b, byte(c))
- }
- }
- return string(b)
- }
- type parser struct {
- err error // Last error seen
- }
- type formatter struct {
- err error // Last error seen
- }
- // parseString parses bytes as a NUL-terminated C-style string.
- // If a NUL byte is not found then the whole slice is returned as a string.
- func (*parser) parseString(b []byte) string {
- if i := bytes.IndexByte(b, 0); i >= 0 {
- return string(b[:i])
- }
- return string(b)
- }
- // formatString copies s into b, NUL-terminating if possible.
- func (f *formatter) formatString(b []byte, s string) {
- if len(s) > len(b) {
- f.err = ErrFieldTooLong
- }
- copy(b, s)
- if len(s) < len(b) {
- b[len(s)] = 0
- }
- // Some buggy readers treat regular files with a trailing slash
- // in the V7 path field as a directory even though the full path
- // recorded elsewhere (e.g., via PAX record) contains no trailing slash.
- if len(s) > len(b) && b[len(b)-1] == '/' {
- n := len(strings.TrimRight(s[:len(b)], "/"))
- b[n] = 0 // Replace trailing slash with NUL terminator
- }
- }
- // fitsInBase256 reports whether x can be encoded into n bytes using base-256
- // encoding. Unlike octal encoding, base-256 encoding does not require that the
- // string ends with a NUL character. Thus, all n bytes are available for output.
- //
- // If operating in binary mode, this assumes strict GNU binary mode; which means
- // that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
- // equivalent to the sign bit in two's complement form.
- func fitsInBase256(n int, x int64) bool {
- binBits := uint(n-1) * 8
- return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
- }
- // parseNumeric parses the input as being encoded in either base-256 or octal.
- // This function may return negative numbers.
- // If parsing fails or an integer overflow occurs, err will be set.
- func (p *parser) parseNumeric(b []byte) int64 {
- // Check for base-256 (binary) format first.
- // If the first bit is set, then all following bits constitute a two's
- // complement encoded number in big-endian byte order.
- if len(b) > 0 && b[0]&0x80 != 0 {
- // Handling negative numbers relies on the following identity:
- // -a-1 == ^a
- //
- // If the number is negative, we use an inversion mask to invert the
- // data bytes and treat the value as an unsigned number.
- var inv byte // 0x00 if positive or zero, 0xff if negative
- if b[0]&0x40 != 0 {
- inv = 0xff
- }
- var x uint64
- for i, c := range b {
- c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
- if i == 0 {
- c &= 0x7f // Ignore signal bit in first byte
- }
- if (x >> 56) > 0 {
- p.err = ErrHeader // Integer overflow
- return 0
- }
- x = x<<8 | uint64(c)
- }
- if (x >> 63) > 0 {
- p.err = ErrHeader // Integer overflow
- return 0
- }
- if inv == 0xff {
- return ^int64(x)
- }
- return int64(x)
- }
- // Normal case is base-8 (octal) format.
- return p.parseOctal(b)
- }
- // formatNumeric encodes x into b using base-8 (octal) encoding if possible.
- // Otherwise it will attempt to use base-256 (binary) encoding.
- func (f *formatter) formatNumeric(b []byte, x int64) {
- if fitsInOctal(len(b), x) {
- f.formatOctal(b, x)
- return
- }
- if fitsInBase256(len(b), x) {
- for i := len(b) - 1; i >= 0; i-- {
- b[i] = byte(x)
- x >>= 8
- }
- b[0] |= 0x80 // Highest bit indicates binary format
- return
- }
- f.formatOctal(b, 0) // Last resort, just write zero
- f.err = ErrFieldTooLong
- }
- func (p *parser) parseOctal(b []byte) int64 {
- // Because unused fields are filled with NULs, we need
- // to skip leading NULs. Fields may also be padded with
- // spaces or NULs.
- // So we remove leading and trailing NULs and spaces to
- // be sure.
- b = bytes.Trim(b, " \x00")
- if len(b) == 0 {
- return 0
- }
- x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
- if perr != nil {
- p.err = ErrHeader
- }
- return int64(x)
- }
- func (f *formatter) formatOctal(b []byte, x int64) {
- if !fitsInOctal(len(b), x) {
- x = 0 // Last resort, just write zero
- f.err = ErrFieldTooLong
- }
- s := strconv.FormatInt(x, 8)
- // Add leading zeros, but leave room for a NUL.
- if n := len(b) - len(s) - 1; n > 0 {
- s = strings.Repeat("0", n) + s
- }
- f.formatString(b, s)
- }
- // fitsInOctal reports whether the integer x fits in a field n-bytes long
- // using octal encoding with the appropriate NUL terminator.
- func fitsInOctal(n int, x int64) bool {
- octBits := uint(n-1) * 3
- return x >= 0 && (n >= 22 || x < 1<<octBits)
- }
- // parsePAXTime takes a string of the form %d.%d as described in the PAX
- // specification. Note that this implementation allows for negative timestamps,
- // which is allowed for by the PAX specification, but not always portable.
- func parsePAXTime(s string) (time.Time, error) {
- const maxNanoSecondDigits = 9
- // Split string into seconds and sub-seconds parts.
- ss, sn := s, ""
- if pos := strings.IndexByte(s, '.'); pos >= 0 {
- ss, sn = s[:pos], s[pos+1:]
- }
- // Parse the seconds.
- secs, err := strconv.ParseInt(ss, 10, 64)
- if err != nil {
- return time.Time{}, ErrHeader
- }
- if len(sn) == 0 {
- return time.Unix(secs, 0), nil // No sub-second values
- }
- // Parse the nanoseconds.
- if strings.Trim(sn, "0123456789") != "" {
- return time.Time{}, ErrHeader
- }
- if len(sn) < maxNanoSecondDigits {
- sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad
- } else {
- sn = sn[:maxNanoSecondDigits] // Right truncate
- }
- nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed
- if len(ss) > 0 && ss[0] == '-' {
- return time.Unix(secs, -1*nsecs), nil // Negative correction
- }
- return time.Unix(secs, nsecs), nil
- }
- // formatPAXTime converts ts into a time of the form %d.%d as described in the
- // PAX specification. This function is capable of negative timestamps.
- func formatPAXTime(ts time.Time) (s string) {
- secs, nsecs := ts.Unix(), ts.Nanosecond()
- if nsecs == 0 {
- return strconv.FormatInt(secs, 10)
- }
- // If seconds is negative, then perform correction.
- sign := ""
- if secs < 0 {
- sign = "-" // Remember sign
- secs = -(secs + 1) // Add a second to secs
- nsecs = -(nsecs - 1E9) // Take that second away from nsecs
- }
- return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0")
- }
- // parsePAXRecord parses the input PAX record string into a key-value pair.
- // If parsing is successful, it will slice off the currently read record and
- // return the remainder as r.
- func parsePAXRecord(s string) (k, v, r string, err error) {
- // The size field ends at the first space.
- sp := strings.IndexByte(s, ' ')
- if sp == -1 {
- return "", "", s, ErrHeader
- }
- // Parse the first token as a decimal integer.
- n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
- if perr != nil || n < 5 || int64(len(s)) < n {
- return "", "", s, ErrHeader
- }
- // Extract everything between the space and the final newline.
- rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
- if nl != "\n" {
- return "", "", s, ErrHeader
- }
- // The first equals separates the key from the value.
- eq := strings.IndexByte(rec, '=')
- if eq == -1 {
- return "", "", s, ErrHeader
- }
- k, v = rec[:eq], rec[eq+1:]
- if !validPAXRecord(k, v) {
- return "", "", s, ErrHeader
- }
- return k, v, rem, nil
- }
- // formatPAXRecord formats a single PAX record, prefixing it with the
- // appropriate length.
- func formatPAXRecord(k, v string) (string, error) {
- if !validPAXRecord(k, v) {
- return "", ErrHeader
- }
- const padding = 3 // Extra padding for ' ', '=', and '\n'
- size := len(k) + len(v) + padding
- size += len(strconv.Itoa(size))
- record := strconv.Itoa(size) + " " + k + "=" + v + "\n"
- // Final adjustment if adding size field increased the record size.
- if len(record) != size {
- size = len(record)
- record = strconv.Itoa(size) + " " + k + "=" + v + "\n"
- }
- return record, nil
- }
- // validPAXRecord reports whether the key-value pair is valid where each
- // record is formatted as:
- // "%d %s=%s\n" % (size, key, value)
- //
- // Keys and values should be UTF-8, but the number of bad writers out there
- // forces us to be a more liberal.
- // Thus, we only reject all keys with NUL, and only reject NULs in values
- // for the PAX version of the USTAR string fields.
- // The key must not contain an '=' character.
- func validPAXRecord(k, v string) bool {
- if k == "" || strings.IndexByte(k, '=') >= 0 {
- return false
- }
- switch k {
- case paxPath, paxLinkpath, paxUname, paxGname:
- return !hasNUL(v)
- default:
- return !hasNUL(k)
- }
- }
|