123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303 |
- // Copyright 2016 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package tar
- import "strings"
- // Format represents the tar archive format.
- //
- // The original tar format was introduced in Unix V7.
- // Since then, there have been multiple competing formats attempting to
- // standardize or extend the V7 format to overcome its limitations.
- // The most common formats are the USTAR, PAX, and GNU formats,
- // each with their own advantages and limitations.
- //
- // The following table captures the capabilities of each format:
- //
- // | USTAR | PAX | GNU
- // ------------------+--------+-----------+----------
- // Name | 256B | unlimited | unlimited
- // Linkname | 100B | unlimited | unlimited
- // Size | uint33 | unlimited | uint89
- // Mode | uint21 | uint21 | uint57
- // Uid/Gid | uint21 | unlimited | uint57
- // Uname/Gname | 32B | unlimited | 32B
- // ModTime | uint33 | unlimited | int89
- // AccessTime | n/a | unlimited | int89
- // ChangeTime | n/a | unlimited | int89
- // Devmajor/Devminor | uint21 | uint21 | uint57
- // ------------------+--------+-----------+----------
- // string encoding | ASCII | UTF-8 | binary
- // sub-second times | no | yes | no
- // sparse files | no | yes | yes
- //
- // The table's upper portion shows the Header fields, where each format reports
- // the maximum number of bytes allowed for each string field and
- // the integer type used to store each numeric field
- // (where timestamps are stored as the number of seconds since the Unix epoch).
- //
- // The table's lower portion shows specialized features of each format,
- // such as supported string encodings, support for sub-second timestamps,
- // or support for sparse files.
- //
- // The Writer currently provides no support for sparse files.
- type Format int
- // Constants to identify various tar formats.
- const (
- // Deliberately hide the meaning of constants from public API.
- _ Format = (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc...
- // FormatUnknown indicates that the format is unknown.
- FormatUnknown
- // The format of the original Unix V7 tar tool prior to standardization.
- formatV7
- // FormatUSTAR represents the USTAR header format defined in POSIX.1-1988.
- //
- // While this format is compatible with most tar readers,
- // the format has several limitations making it unsuitable for some usages.
- // Most notably, it cannot support sparse files, files larger than 8GiB,
- // filenames larger than 256 characters, and non-ASCII filenames.
- //
- // Reference:
- // http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
- FormatUSTAR
- // FormatPAX represents the PAX header format defined in POSIX.1-2001.
- //
- // PAX extends USTAR by writing a special file with Typeflag TypeXHeader
- // preceding the original header. This file contains a set of key-value
- // records, which are used to overcome USTAR's shortcomings, in addition to
- // providing the ability to have sub-second resolution for timestamps.
- //
- // Some newer formats add their own extensions to PAX by defining their
- // own keys and assigning certain semantic meaning to the associated values.
- // For example, sparse file support in PAX is implemented using keys
- // defined by the GNU manual (e.g., "GNU.sparse.map").
- //
- // Reference:
- // http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html
- FormatPAX
- // FormatGNU represents the GNU header format.
- //
- // The GNU header format is older than the USTAR and PAX standards and
- // is not compatible with them. The GNU format supports
- // arbitrary file sizes, filenames of arbitrary encoding and length,
- // sparse files, and other features.
- //
- // It is recommended that PAX be chosen over GNU unless the target
- // application can only parse GNU formatted archives.
- //
- // Reference:
- // https://www.gnu.org/software/tar/manual/html_node/Standard.html
- FormatGNU
- // Schily's tar format, which is incompatible with USTAR.
- // This does not cover STAR extensions to the PAX format; these fall under
- // the PAX format.
- formatSTAR
- formatMax
- )
- func (f Format) has(f2 Format) bool { return f&f2 != 0 }
- func (f *Format) mayBe(f2 Format) { *f |= f2 }
- func (f *Format) mayOnlyBe(f2 Format) { *f &= f2 }
- func (f *Format) mustNotBe(f2 Format) { *f &^= f2 }
- var formatNames = map[Format]string{
- formatV7: "V7", FormatUSTAR: "USTAR", FormatPAX: "PAX", FormatGNU: "GNU", formatSTAR: "STAR",
- }
- func (f Format) String() string {
- var ss []string
- for f2 := Format(1); f2 < formatMax; f2 <<= 1 {
- if f.has(f2) {
- ss = append(ss, formatNames[f2])
- }
- }
- switch len(ss) {
- case 0:
- return "<unknown>"
- case 1:
- return ss[0]
- default:
- return "(" + strings.Join(ss, " | ") + ")"
- }
- }
- // Magics used to identify various formats.
- const (
- magicGNU, versionGNU = "ustar ", " \x00"
- magicUSTAR, versionUSTAR = "ustar\x00", "00"
- trailerSTAR = "tar\x00"
- )
- // Size constants from various tar specifications.
- const (
- blockSize = 512 // Size of each block in a tar stream
- nameSize = 100 // Max length of the name field in USTAR format
- prefixSize = 155 // Max length of the prefix field in USTAR format
- )
- // blockPadding computes the number of bytes needed to pad offset up to the
- // nearest block edge where 0 <= n < blockSize.
- func blockPadding(offset int64) (n int64) {
- return -offset & (blockSize - 1)
- }
- var zeroBlock block
- type block [blockSize]byte
- // Convert block to any number of formats.
- func (b *block) V7() *headerV7 { return (*headerV7)(b) }
- func (b *block) GNU() *headerGNU { return (*headerGNU)(b) }
- func (b *block) STAR() *headerSTAR { return (*headerSTAR)(b) }
- func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) }
- func (b *block) Sparse() sparseArray { return (sparseArray)(b[:]) }
- // GetFormat checks that the block is a valid tar header based on the checksum.
- // It then attempts to guess the specific format based on magic values.
- // If the checksum fails, then FormatUnknown is returned.
- func (b *block) GetFormat() Format {
- // Verify checksum.
- var p parser
- value := p.parseOctal(b.V7().Chksum())
- chksum1, chksum2 := b.ComputeChecksum()
- if p.err != nil || (value != chksum1 && value != chksum2) {
- return FormatUnknown
- }
- // Guess the magic values.
- magic := string(b.USTAR().Magic())
- version := string(b.USTAR().Version())
- trailer := string(b.STAR().Trailer())
- switch {
- case magic == magicUSTAR && trailer == trailerSTAR:
- return formatSTAR
- case magic == magicUSTAR:
- return FormatUSTAR | FormatPAX
- case magic == magicGNU && version == versionGNU:
- return FormatGNU
- default:
- return formatV7
- }
- }
- // SetFormat writes the magic values necessary for specified format
- // and then updates the checksum accordingly.
- func (b *block) SetFormat(format Format) {
- // Set the magic values.
- switch {
- case format.has(formatV7):
- // Do nothing.
- case format.has(FormatGNU):
- copy(b.GNU().Magic(), magicGNU)
- copy(b.GNU().Version(), versionGNU)
- case format.has(formatSTAR):
- copy(b.STAR().Magic(), magicUSTAR)
- copy(b.STAR().Version(), versionUSTAR)
- copy(b.STAR().Trailer(), trailerSTAR)
- case format.has(FormatUSTAR | FormatPAX):
- copy(b.USTAR().Magic(), magicUSTAR)
- copy(b.USTAR().Version(), versionUSTAR)
- default:
- panic("invalid format")
- }
- // Update checksum.
- // This field is special in that it is terminated by a NULL then space.
- var f formatter
- field := b.V7().Chksum()
- chksum, _ := b.ComputeChecksum() // Possible values are 256..128776
- f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143
- field[7] = ' '
- }
- // ComputeChecksum computes the checksum for the header block.
- // POSIX specifies a sum of the unsigned byte values, but the Sun tar used
- // signed byte values.
- // We compute and return both.
- func (b *block) ComputeChecksum() (unsigned, signed int64) {
- for i, c := range b {
- if 148 <= i && i < 156 {
- c = ' ' // Treat the checksum field itself as all spaces.
- }
- unsigned += int64(c)
- signed += int64(int8(c))
- }
- return unsigned, signed
- }
- // Reset clears the block with all zeros.
- func (b *block) Reset() {
- *b = block{}
- }
- type headerV7 [blockSize]byte
- func (h *headerV7) Name() []byte { return h[000:][:100] }
- func (h *headerV7) Mode() []byte { return h[100:][:8] }
- func (h *headerV7) UID() []byte { return h[108:][:8] }
- func (h *headerV7) GID() []byte { return h[116:][:8] }
- func (h *headerV7) Size() []byte { return h[124:][:12] }
- func (h *headerV7) ModTime() []byte { return h[136:][:12] }
- func (h *headerV7) Chksum() []byte { return h[148:][:8] }
- func (h *headerV7) TypeFlag() []byte { return h[156:][:1] }
- func (h *headerV7) LinkName() []byte { return h[157:][:100] }
- type headerGNU [blockSize]byte
- func (h *headerGNU) V7() *headerV7 { return (*headerV7)(h) }
- func (h *headerGNU) Magic() []byte { return h[257:][:6] }
- func (h *headerGNU) Version() []byte { return h[263:][:2] }
- func (h *headerGNU) UserName() []byte { return h[265:][:32] }
- func (h *headerGNU) GroupName() []byte { return h[297:][:32] }
- func (h *headerGNU) DevMajor() []byte { return h[329:][:8] }
- func (h *headerGNU) DevMinor() []byte { return h[337:][:8] }
- func (h *headerGNU) AccessTime() []byte { return h[345:][:12] }
- func (h *headerGNU) ChangeTime() []byte { return h[357:][:12] }
- func (h *headerGNU) Sparse() sparseArray { return (sparseArray)(h[386:][:24*4+1]) }
- func (h *headerGNU) RealSize() []byte { return h[483:][:12] }
- type headerSTAR [blockSize]byte
- func (h *headerSTAR) V7() *headerV7 { return (*headerV7)(h) }
- func (h *headerSTAR) Magic() []byte { return h[257:][:6] }
- func (h *headerSTAR) Version() []byte { return h[263:][:2] }
- func (h *headerSTAR) UserName() []byte { return h[265:][:32] }
- func (h *headerSTAR) GroupName() []byte { return h[297:][:32] }
- func (h *headerSTAR) DevMajor() []byte { return h[329:][:8] }
- func (h *headerSTAR) DevMinor() []byte { return h[337:][:8] }
- func (h *headerSTAR) Prefix() []byte { return h[345:][:131] }
- func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] }
- func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] }
- func (h *headerSTAR) Trailer() []byte { return h[508:][:4] }
- type headerUSTAR [blockSize]byte
- func (h *headerUSTAR) V7() *headerV7 { return (*headerV7)(h) }
- func (h *headerUSTAR) Magic() []byte { return h[257:][:6] }
- func (h *headerUSTAR) Version() []byte { return h[263:][:2] }
- func (h *headerUSTAR) UserName() []byte { return h[265:][:32] }
- func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] }
- func (h *headerUSTAR) DevMajor() []byte { return h[329:][:8] }
- func (h *headerUSTAR) DevMinor() []byte { return h[337:][:8] }
- func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] }
- type sparseArray []byte
- func (s sparseArray) Entry(i int) sparseElem { return (sparseElem)(s[i*24:]) }
- func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] }
- func (s sparseArray) MaxEntries() int { return len(s) / 24 }
- type sparseElem []byte
- func (s sparseElem) Offset() []byte { return s[00:][:12] }
- func (s sparseElem) Length() []byte { return s[12:][:12] }
|