format.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package tar
  5. import "strings"
  6. // Format represents the tar archive format.
  7. //
  8. // The original tar format was introduced in Unix V7.
  9. // Since then, there have been multiple competing formats attempting to
  10. // standardize or extend the V7 format to overcome its limitations.
  11. // The most common formats are the USTAR, PAX, and GNU formats,
  12. // each with their own advantages and limitations.
  13. //
  14. // The following table captures the capabilities of each format:
  15. //
  16. // | USTAR | PAX | GNU
  17. // ------------------+--------+-----------+----------
  18. // Name | 256B | unlimited | unlimited
  19. // Linkname | 100B | unlimited | unlimited
  20. // Size | uint33 | unlimited | uint89
  21. // Mode | uint21 | uint21 | uint57
  22. // Uid/Gid | uint21 | unlimited | uint57
  23. // Uname/Gname | 32B | unlimited | 32B
  24. // ModTime | uint33 | unlimited | int89
  25. // AccessTime | n/a | unlimited | int89
  26. // ChangeTime | n/a | unlimited | int89
  27. // Devmajor/Devminor | uint21 | uint21 | uint57
  28. // ------------------+--------+-----------+----------
  29. // string encoding | ASCII | UTF-8 | binary
  30. // sub-second times | no | yes | no
  31. // sparse files | no | yes | yes
  32. //
  33. // The table's upper portion shows the Header fields, where each format reports
  34. // the maximum number of bytes allowed for each string field and
  35. // the integer type used to store each numeric field
  36. // (where timestamps are stored as the number of seconds since the Unix epoch).
  37. //
  38. // The table's lower portion shows specialized features of each format,
  39. // such as supported string encodings, support for sub-second timestamps,
  40. // or support for sparse files.
  41. //
  42. // The Writer currently provides no support for sparse files.
  43. type Format int
  44. // Constants to identify various tar formats.
  45. const (
  46. // Deliberately hide the meaning of constants from public API.
  47. _ Format = (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc...
  48. // FormatUnknown indicates that the format is unknown.
  49. FormatUnknown
  50. // The format of the original Unix V7 tar tool prior to standardization.
  51. formatV7
  52. // FormatUSTAR represents the USTAR header format defined in POSIX.1-1988.
  53. //
  54. // While this format is compatible with most tar readers,
  55. // the format has several limitations making it unsuitable for some usages.
  56. // Most notably, it cannot support sparse files, files larger than 8GiB,
  57. // filenames larger than 256 characters, and non-ASCII filenames.
  58. //
  59. // Reference:
  60. // http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06
  61. FormatUSTAR
  62. // FormatPAX represents the PAX header format defined in POSIX.1-2001.
  63. //
  64. // PAX extends USTAR by writing a special file with Typeflag TypeXHeader
  65. // preceding the original header. This file contains a set of key-value
  66. // records, which are used to overcome USTAR's shortcomings, in addition to
  67. // providing the ability to have sub-second resolution for timestamps.
  68. //
  69. // Some newer formats add their own extensions to PAX by defining their
  70. // own keys and assigning certain semantic meaning to the associated values.
  71. // For example, sparse file support in PAX is implemented using keys
  72. // defined by the GNU manual (e.g., "GNU.sparse.map").
  73. //
  74. // Reference:
  75. // http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html
  76. FormatPAX
  77. // FormatGNU represents the GNU header format.
  78. //
  79. // The GNU header format is older than the USTAR and PAX standards and
  80. // is not compatible with them. The GNU format supports
  81. // arbitrary file sizes, filenames of arbitrary encoding and length,
  82. // sparse files, and other features.
  83. //
  84. // It is recommended that PAX be chosen over GNU unless the target
  85. // application can only parse GNU formatted archives.
  86. //
  87. // Reference:
  88. // https://www.gnu.org/software/tar/manual/html_node/Standard.html
  89. FormatGNU
  90. // Schily's tar format, which is incompatible with USTAR.
  91. // This does not cover STAR extensions to the PAX format; these fall under
  92. // the PAX format.
  93. formatSTAR
  94. formatMax
  95. )
  96. func (f Format) has(f2 Format) bool { return f&f2 != 0 }
  97. func (f *Format) mayBe(f2 Format) { *f |= f2 }
  98. func (f *Format) mayOnlyBe(f2 Format) { *f &= f2 }
  99. func (f *Format) mustNotBe(f2 Format) { *f &^= f2 }
  100. var formatNames = map[Format]string{
  101. formatV7: "V7", FormatUSTAR: "USTAR", FormatPAX: "PAX", FormatGNU: "GNU", formatSTAR: "STAR",
  102. }
  103. func (f Format) String() string {
  104. var ss []string
  105. for f2 := Format(1); f2 < formatMax; f2 <<= 1 {
  106. if f.has(f2) {
  107. ss = append(ss, formatNames[f2])
  108. }
  109. }
  110. switch len(ss) {
  111. case 0:
  112. return "<unknown>"
  113. case 1:
  114. return ss[0]
  115. default:
  116. return "(" + strings.Join(ss, " | ") + ")"
  117. }
  118. }
  119. // Magics used to identify various formats.
  120. const (
  121. magicGNU, versionGNU = "ustar ", " \x00"
  122. magicUSTAR, versionUSTAR = "ustar\x00", "00"
  123. trailerSTAR = "tar\x00"
  124. )
  125. // Size constants from various tar specifications.
  126. const (
  127. blockSize = 512 // Size of each block in a tar stream
  128. nameSize = 100 // Max length of the name field in USTAR format
  129. prefixSize = 155 // Max length of the prefix field in USTAR format
  130. )
  131. // blockPadding computes the number of bytes needed to pad offset up to the
  132. // nearest block edge where 0 <= n < blockSize.
  133. func blockPadding(offset int64) (n int64) {
  134. return -offset & (blockSize - 1)
  135. }
  136. var zeroBlock block
  137. type block [blockSize]byte
  138. // Convert block to any number of formats.
  139. func (b *block) V7() *headerV7 { return (*headerV7)(b) }
  140. func (b *block) GNU() *headerGNU { return (*headerGNU)(b) }
  141. func (b *block) STAR() *headerSTAR { return (*headerSTAR)(b) }
  142. func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) }
  143. func (b *block) Sparse() sparseArray { return (sparseArray)(b[:]) }
  144. // GetFormat checks that the block is a valid tar header based on the checksum.
  145. // It then attempts to guess the specific format based on magic values.
  146. // If the checksum fails, then FormatUnknown is returned.
  147. func (b *block) GetFormat() Format {
  148. // Verify checksum.
  149. var p parser
  150. value := p.parseOctal(b.V7().Chksum())
  151. chksum1, chksum2 := b.ComputeChecksum()
  152. if p.err != nil || (value != chksum1 && value != chksum2) {
  153. return FormatUnknown
  154. }
  155. // Guess the magic values.
  156. magic := string(b.USTAR().Magic())
  157. version := string(b.USTAR().Version())
  158. trailer := string(b.STAR().Trailer())
  159. switch {
  160. case magic == magicUSTAR && trailer == trailerSTAR:
  161. return formatSTAR
  162. case magic == magicUSTAR:
  163. return FormatUSTAR | FormatPAX
  164. case magic == magicGNU && version == versionGNU:
  165. return FormatGNU
  166. default:
  167. return formatV7
  168. }
  169. }
  170. // SetFormat writes the magic values necessary for specified format
  171. // and then updates the checksum accordingly.
  172. func (b *block) SetFormat(format Format) {
  173. // Set the magic values.
  174. switch {
  175. case format.has(formatV7):
  176. // Do nothing.
  177. case format.has(FormatGNU):
  178. copy(b.GNU().Magic(), magicGNU)
  179. copy(b.GNU().Version(), versionGNU)
  180. case format.has(formatSTAR):
  181. copy(b.STAR().Magic(), magicUSTAR)
  182. copy(b.STAR().Version(), versionUSTAR)
  183. copy(b.STAR().Trailer(), trailerSTAR)
  184. case format.has(FormatUSTAR | FormatPAX):
  185. copy(b.USTAR().Magic(), magicUSTAR)
  186. copy(b.USTAR().Version(), versionUSTAR)
  187. default:
  188. panic("invalid format")
  189. }
  190. // Update checksum.
  191. // This field is special in that it is terminated by a NULL then space.
  192. var f formatter
  193. field := b.V7().Chksum()
  194. chksum, _ := b.ComputeChecksum() // Possible values are 256..128776
  195. f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143
  196. field[7] = ' '
  197. }
  198. // ComputeChecksum computes the checksum for the header block.
  199. // POSIX specifies a sum of the unsigned byte values, but the Sun tar used
  200. // signed byte values.
  201. // We compute and return both.
  202. func (b *block) ComputeChecksum() (unsigned, signed int64) {
  203. for i, c := range b {
  204. if 148 <= i && i < 156 {
  205. c = ' ' // Treat the checksum field itself as all spaces.
  206. }
  207. unsigned += int64(c)
  208. signed += int64(int8(c))
  209. }
  210. return unsigned, signed
  211. }
  212. // Reset clears the block with all zeros.
  213. func (b *block) Reset() {
  214. *b = block{}
  215. }
  216. type headerV7 [blockSize]byte
  217. func (h *headerV7) Name() []byte { return h[000:][:100] }
  218. func (h *headerV7) Mode() []byte { return h[100:][:8] }
  219. func (h *headerV7) UID() []byte { return h[108:][:8] }
  220. func (h *headerV7) GID() []byte { return h[116:][:8] }
  221. func (h *headerV7) Size() []byte { return h[124:][:12] }
  222. func (h *headerV7) ModTime() []byte { return h[136:][:12] }
  223. func (h *headerV7) Chksum() []byte { return h[148:][:8] }
  224. func (h *headerV7) TypeFlag() []byte { return h[156:][:1] }
  225. func (h *headerV7) LinkName() []byte { return h[157:][:100] }
  226. type headerGNU [blockSize]byte
  227. func (h *headerGNU) V7() *headerV7 { return (*headerV7)(h) }
  228. func (h *headerGNU) Magic() []byte { return h[257:][:6] }
  229. func (h *headerGNU) Version() []byte { return h[263:][:2] }
  230. func (h *headerGNU) UserName() []byte { return h[265:][:32] }
  231. func (h *headerGNU) GroupName() []byte { return h[297:][:32] }
  232. func (h *headerGNU) DevMajor() []byte { return h[329:][:8] }
  233. func (h *headerGNU) DevMinor() []byte { return h[337:][:8] }
  234. func (h *headerGNU) AccessTime() []byte { return h[345:][:12] }
  235. func (h *headerGNU) ChangeTime() []byte { return h[357:][:12] }
  236. func (h *headerGNU) Sparse() sparseArray { return (sparseArray)(h[386:][:24*4+1]) }
  237. func (h *headerGNU) RealSize() []byte { return h[483:][:12] }
  238. type headerSTAR [blockSize]byte
  239. func (h *headerSTAR) V7() *headerV7 { return (*headerV7)(h) }
  240. func (h *headerSTAR) Magic() []byte { return h[257:][:6] }
  241. func (h *headerSTAR) Version() []byte { return h[263:][:2] }
  242. func (h *headerSTAR) UserName() []byte { return h[265:][:32] }
  243. func (h *headerSTAR) GroupName() []byte { return h[297:][:32] }
  244. func (h *headerSTAR) DevMajor() []byte { return h[329:][:8] }
  245. func (h *headerSTAR) DevMinor() []byte { return h[337:][:8] }
  246. func (h *headerSTAR) Prefix() []byte { return h[345:][:131] }
  247. func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] }
  248. func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] }
  249. func (h *headerSTAR) Trailer() []byte { return h[508:][:4] }
  250. type headerUSTAR [blockSize]byte
  251. func (h *headerUSTAR) V7() *headerV7 { return (*headerV7)(h) }
  252. func (h *headerUSTAR) Magic() []byte { return h[257:][:6] }
  253. func (h *headerUSTAR) Version() []byte { return h[263:][:2] }
  254. func (h *headerUSTAR) UserName() []byte { return h[265:][:32] }
  255. func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] }
  256. func (h *headerUSTAR) DevMajor() []byte { return h[329:][:8] }
  257. func (h *headerUSTAR) DevMinor() []byte { return h[337:][:8] }
  258. func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] }
  259. type sparseArray []byte
  260. func (s sparseArray) Entry(i int) sparseElem { return (sparseElem)(s[i*24:]) }
  261. func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] }
  262. func (s sparseArray) MaxEntries() int { return len(s) / 24 }
  263. type sparseElem []byte
  264. func (s sparseElem) Offset() []byte { return s[00:][:12] }
  265. func (s sparseElem) Length() []byte { return s[12:][:12] }