reader.go 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package tar
  5. // TODO(dsymonds):
  6. // - pax extensions
  7. import (
  8. "bytes"
  9. "errors"
  10. "io"
  11. "io/ioutil"
  12. "math"
  13. "os"
  14. "strconv"
  15. "strings"
  16. "time"
  17. )
  18. var (
  19. ErrHeader = errors.New("archive/tar: invalid tar header")
  20. )
  21. const maxNanoSecondIntSize = 9
  22. // A Reader provides sequential access to the contents of a tar archive.
  23. // A tar archive consists of a sequence of files.
  24. // The Next method advances to the next file in the archive (including the first),
  25. // and then it can be treated as an io.Reader to access the file's data.
  26. type Reader struct {
  27. r io.Reader
  28. err error
  29. pad int64 // amount of padding (ignored) after current file entry
  30. curr numBytesReader // reader for current file entry
  31. hdrBuff [blockSize]byte // buffer to use in readHeader
  32. }
  33. type parser struct {
  34. err error // Last error seen
  35. }
  36. // A numBytesReader is an io.Reader with a numBytes method, returning the number
  37. // of bytes remaining in the underlying encoded data.
  38. type numBytesReader interface {
  39. io.Reader
  40. numBytes() int64
  41. }
  42. // A regFileReader is a numBytesReader for reading file data from a tar archive.
  43. type regFileReader struct {
  44. r io.Reader // underlying reader
  45. nb int64 // number of unread bytes for current file entry
  46. }
  47. // A sparseFileReader is a numBytesReader for reading sparse file data from a
  48. // tar archive.
  49. type sparseFileReader struct {
  50. rfr numBytesReader // Reads the sparse-encoded file data
  51. sp []sparseEntry // The sparse map for the file
  52. pos int64 // Keeps track of file position
  53. total int64 // Total size of the file
  54. }
  55. // A sparseEntry holds a single entry in a sparse file's sparse map.
  56. //
  57. // Sparse files are represented using a series of sparseEntrys.
  58. // Despite the name, a sparseEntry represents an actual data fragment that
  59. // references data found in the underlying archive stream. All regions not
  60. // covered by a sparseEntry are logically filled with zeros.
  61. //
  62. // For example, if the underlying raw file contains the 10-byte data:
  63. // var compactData = "abcdefgh"
  64. //
  65. // And the sparse map has the following entries:
  66. // var sp = []sparseEntry{
  67. // {offset: 2, numBytes: 5} // Data fragment for [2..7]
  68. // {offset: 18, numBytes: 3} // Data fragment for [18..21]
  69. // }
  70. //
  71. // Then the content of the resulting sparse file with a "real" size of 25 is:
  72. // var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
  73. type sparseEntry struct {
  74. offset int64 // Starting position of the fragment
  75. numBytes int64 // Length of the fragment
  76. }
  77. // Keywords for GNU sparse files in a PAX extended header
  78. const (
  79. paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
  80. paxGNUSparseOffset = "GNU.sparse.offset"
  81. paxGNUSparseNumBytes = "GNU.sparse.numbytes"
  82. paxGNUSparseMap = "GNU.sparse.map"
  83. paxGNUSparseName = "GNU.sparse.name"
  84. paxGNUSparseMajor = "GNU.sparse.major"
  85. paxGNUSparseMinor = "GNU.sparse.minor"
  86. paxGNUSparseSize = "GNU.sparse.size"
  87. paxGNUSparseRealSize = "GNU.sparse.realsize"
  88. )
  89. // Keywords for old GNU sparse headers
  90. const (
  91. oldGNUSparseMainHeaderOffset = 386
  92. oldGNUSparseMainHeaderIsExtendedOffset = 482
  93. oldGNUSparseMainHeaderNumEntries = 4
  94. oldGNUSparseExtendedHeaderIsExtendedOffset = 504
  95. oldGNUSparseExtendedHeaderNumEntries = 21
  96. oldGNUSparseOffsetSize = 12
  97. oldGNUSparseNumBytesSize = 12
  98. )
  99. // NewReader creates a new Reader reading from r.
  100. func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
  101. // Next advances to the next entry in the tar archive.
  102. //
  103. // io.EOF is returned at the end of the input.
  104. func (tr *Reader) Next() (*Header, error) {
  105. if tr.err != nil {
  106. return nil, tr.err
  107. }
  108. var hdr *Header
  109. var extHdrs map[string]string
  110. // Externally, Next iterates through the tar archive as if it is a series of
  111. // files. Internally, the tar format often uses fake "files" to add meta
  112. // data that describes the next file. These meta data "files" should not
  113. // normally be visible to the outside. As such, this loop iterates through
  114. // one or more "header files" until it finds a "normal file".
  115. loop:
  116. for {
  117. tr.err = tr.skipUnread()
  118. if tr.err != nil {
  119. return nil, tr.err
  120. }
  121. hdr = tr.readHeader()
  122. if tr.err != nil {
  123. return nil, tr.err
  124. }
  125. // Check for PAX/GNU special headers and files.
  126. switch hdr.Typeflag {
  127. case TypeXHeader:
  128. extHdrs, tr.err = parsePAX(tr)
  129. if tr.err != nil {
  130. return nil, tr.err
  131. }
  132. continue loop // This is a meta header affecting the next header
  133. case TypeGNULongName, TypeGNULongLink:
  134. var realname []byte
  135. realname, tr.err = ioutil.ReadAll(tr)
  136. if tr.err != nil {
  137. return nil, tr.err
  138. }
  139. // Convert GNU extensions to use PAX headers.
  140. if extHdrs == nil {
  141. extHdrs = make(map[string]string)
  142. }
  143. var p parser
  144. switch hdr.Typeflag {
  145. case TypeGNULongName:
  146. extHdrs[paxPath] = p.parseString(realname)
  147. case TypeGNULongLink:
  148. extHdrs[paxLinkpath] = p.parseString(realname)
  149. }
  150. if p.err != nil {
  151. tr.err = p.err
  152. return nil, tr.err
  153. }
  154. continue loop // This is a meta header affecting the next header
  155. default:
  156. mergePAX(hdr, extHdrs)
  157. // Check for a PAX format sparse file
  158. sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs)
  159. if err != nil {
  160. tr.err = err
  161. return nil, err
  162. }
  163. if sp != nil {
  164. // Current file is a PAX format GNU sparse file.
  165. // Set the current file reader to a sparse file reader.
  166. tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
  167. if tr.err != nil {
  168. return nil, tr.err
  169. }
  170. }
  171. break loop // This is a file, so stop
  172. }
  173. }
  174. return hdr, nil
  175. }
  176. // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
  177. // this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to
  178. // be treated as a regular file.
  179. func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) {
  180. var sparseFormat string
  181. // Check for sparse format indicators
  182. major, majorOk := headers[paxGNUSparseMajor]
  183. minor, minorOk := headers[paxGNUSparseMinor]
  184. sparseName, sparseNameOk := headers[paxGNUSparseName]
  185. _, sparseMapOk := headers[paxGNUSparseMap]
  186. sparseSize, sparseSizeOk := headers[paxGNUSparseSize]
  187. sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize]
  188. // Identify which, if any, sparse format applies from which PAX headers are set
  189. if majorOk && minorOk {
  190. sparseFormat = major + "." + minor
  191. } else if sparseNameOk && sparseMapOk {
  192. sparseFormat = "0.1"
  193. } else if sparseSizeOk {
  194. sparseFormat = "0.0"
  195. } else {
  196. // Not a PAX format GNU sparse file.
  197. return nil, nil
  198. }
  199. // Check for unknown sparse format
  200. if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" {
  201. return nil, nil
  202. }
  203. // Update hdr from GNU sparse PAX headers
  204. if sparseNameOk {
  205. hdr.Name = sparseName
  206. }
  207. if sparseSizeOk {
  208. realSize, err := strconv.ParseInt(sparseSize, 10, 0)
  209. if err != nil {
  210. return nil, ErrHeader
  211. }
  212. hdr.Size = realSize
  213. } else if sparseRealSizeOk {
  214. realSize, err := strconv.ParseInt(sparseRealSize, 10, 0)
  215. if err != nil {
  216. return nil, ErrHeader
  217. }
  218. hdr.Size = realSize
  219. }
  220. // Set up the sparse map, according to the particular sparse format in use
  221. var sp []sparseEntry
  222. var err error
  223. switch sparseFormat {
  224. case "0.0", "0.1":
  225. sp, err = readGNUSparseMap0x1(headers)
  226. case "1.0":
  227. sp, err = readGNUSparseMap1x0(tr.curr)
  228. }
  229. return sp, err
  230. }
  231. // mergePAX merges well known headers according to PAX standard.
  232. // In general headers with the same name as those found
  233. // in the header struct overwrite those found in the header
  234. // struct with higher precision or longer values. Esp. useful
  235. // for name and linkname fields.
  236. func mergePAX(hdr *Header, headers map[string]string) error {
  237. for k, v := range headers {
  238. switch k {
  239. case paxPath:
  240. hdr.Name = v
  241. case paxLinkpath:
  242. hdr.Linkname = v
  243. case paxGname:
  244. hdr.Gname = v
  245. case paxUname:
  246. hdr.Uname = v
  247. case paxUid:
  248. uid, err := strconv.ParseInt(v, 10, 0)
  249. if err != nil {
  250. return err
  251. }
  252. hdr.Uid = int(uid)
  253. case paxGid:
  254. gid, err := strconv.ParseInt(v, 10, 0)
  255. if err != nil {
  256. return err
  257. }
  258. hdr.Gid = int(gid)
  259. case paxAtime:
  260. t, err := parsePAXTime(v)
  261. if err != nil {
  262. return err
  263. }
  264. hdr.AccessTime = t
  265. case paxMtime:
  266. t, err := parsePAXTime(v)
  267. if err != nil {
  268. return err
  269. }
  270. hdr.ModTime = t
  271. case paxCtime:
  272. t, err := parsePAXTime(v)
  273. if err != nil {
  274. return err
  275. }
  276. hdr.ChangeTime = t
  277. case paxCreationTime:
  278. t, err := parsePAXTime(v)
  279. if err != nil {
  280. return err
  281. }
  282. hdr.CreationTime = t
  283. case paxSize:
  284. size, err := strconv.ParseInt(v, 10, 0)
  285. if err != nil {
  286. return err
  287. }
  288. hdr.Size = int64(size)
  289. default:
  290. if strings.HasPrefix(k, paxXattr) {
  291. if hdr.Xattrs == nil {
  292. hdr.Xattrs = make(map[string]string)
  293. }
  294. hdr.Xattrs[k[len(paxXattr):]] = v
  295. } else if strings.HasPrefix(k, paxWindows) {
  296. if hdr.Winheaders == nil {
  297. hdr.Winheaders = make(map[string]string)
  298. }
  299. hdr.Winheaders[k[len(paxWindows):]] = v
  300. }
  301. }
  302. }
  303. return nil
  304. }
  305. // parsePAXTime takes a string of the form %d.%d as described in
  306. // the PAX specification.
  307. func parsePAXTime(t string) (time.Time, error) {
  308. buf := []byte(t)
  309. pos := bytes.IndexByte(buf, '.')
  310. var seconds, nanoseconds int64
  311. var err error
  312. if pos == -1 {
  313. seconds, err = strconv.ParseInt(t, 10, 0)
  314. if err != nil {
  315. return time.Time{}, err
  316. }
  317. } else {
  318. seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0)
  319. if err != nil {
  320. return time.Time{}, err
  321. }
  322. nano_buf := string(buf[pos+1:])
  323. // Pad as needed before converting to a decimal.
  324. // For example .030 -> .030000000 -> 30000000 nanoseconds
  325. if len(nano_buf) < maxNanoSecondIntSize {
  326. // Right pad
  327. nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf))
  328. } else if len(nano_buf) > maxNanoSecondIntSize {
  329. // Right truncate
  330. nano_buf = nano_buf[:maxNanoSecondIntSize]
  331. }
  332. nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0)
  333. if err != nil {
  334. return time.Time{}, err
  335. }
  336. }
  337. ts := time.Unix(seconds, nanoseconds)
  338. return ts, nil
  339. }
  340. // parsePAX parses PAX headers.
  341. // If an extended header (type 'x') is invalid, ErrHeader is returned
  342. func parsePAX(r io.Reader) (map[string]string, error) {
  343. buf, err := ioutil.ReadAll(r)
  344. if err != nil {
  345. return nil, err
  346. }
  347. sbuf := string(buf)
  348. // For GNU PAX sparse format 0.0 support.
  349. // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers.
  350. var sparseMap bytes.Buffer
  351. headers := make(map[string]string)
  352. // Each record is constructed as
  353. // "%d %s=%s\n", length, keyword, value
  354. for len(sbuf) > 0 {
  355. key, value, residual, err := parsePAXRecord(sbuf)
  356. if err != nil {
  357. return nil, ErrHeader
  358. }
  359. sbuf = residual
  360. keyStr := string(key)
  361. if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes {
  362. // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
  363. sparseMap.WriteString(value)
  364. sparseMap.Write([]byte{','})
  365. } else {
  366. // Normal key. Set the value in the headers map.
  367. headers[keyStr] = string(value)
  368. }
  369. }
  370. if sparseMap.Len() != 0 {
  371. // Add sparse info to headers, chopping off the extra comma
  372. sparseMap.Truncate(sparseMap.Len() - 1)
  373. headers[paxGNUSparseMap] = sparseMap.String()
  374. }
  375. return headers, nil
  376. }
  377. // parsePAXRecord parses the input PAX record string into a key-value pair.
  378. // If parsing is successful, it will slice off the currently read record and
  379. // return the remainder as r.
  380. //
  381. // A PAX record is of the following form:
  382. // "%d %s=%s\n" % (size, key, value)
  383. func parsePAXRecord(s string) (k, v, r string, err error) {
  384. // The size field ends at the first space.
  385. sp := strings.IndexByte(s, ' ')
  386. if sp == -1 {
  387. return "", "", s, ErrHeader
  388. }
  389. // Parse the first token as a decimal integer.
  390. n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
  391. if perr != nil || n < 5 || int64(len(s)) < n {
  392. return "", "", s, ErrHeader
  393. }
  394. // Extract everything between the space and the final newline.
  395. rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
  396. if nl != "\n" {
  397. return "", "", s, ErrHeader
  398. }
  399. // The first equals separates the key from the value.
  400. eq := strings.IndexByte(rec, '=')
  401. if eq == -1 {
  402. return "", "", s, ErrHeader
  403. }
  404. return rec[:eq], rec[eq+1:], rem, nil
  405. }
  406. // parseString parses bytes as a NUL-terminated C-style string.
  407. // If a NUL byte is not found then the whole slice is returned as a string.
  408. func (*parser) parseString(b []byte) string {
  409. n := 0
  410. for n < len(b) && b[n] != 0 {
  411. n++
  412. }
  413. return string(b[0:n])
  414. }
  415. // parseNumeric parses the input as being encoded in either base-256 or octal.
  416. // This function may return negative numbers.
  417. // If parsing fails or an integer overflow occurs, err will be set.
  418. func (p *parser) parseNumeric(b []byte) int64 {
  419. // Check for base-256 (binary) format first.
  420. // If the first bit is set, then all following bits constitute a two's
  421. // complement encoded number in big-endian byte order.
  422. if len(b) > 0 && b[0]&0x80 != 0 {
  423. // Handling negative numbers relies on the following identity:
  424. // -a-1 == ^a
  425. //
  426. // If the number is negative, we use an inversion mask to invert the
  427. // data bytes and treat the value as an unsigned number.
  428. var inv byte // 0x00 if positive or zero, 0xff if negative
  429. if b[0]&0x40 != 0 {
  430. inv = 0xff
  431. }
  432. var x uint64
  433. for i, c := range b {
  434. c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
  435. if i == 0 {
  436. c &= 0x7f // Ignore signal bit in first byte
  437. }
  438. if (x >> 56) > 0 {
  439. p.err = ErrHeader // Integer overflow
  440. return 0
  441. }
  442. x = x<<8 | uint64(c)
  443. }
  444. if (x >> 63) > 0 {
  445. p.err = ErrHeader // Integer overflow
  446. return 0
  447. }
  448. if inv == 0xff {
  449. return ^int64(x)
  450. }
  451. return int64(x)
  452. }
  453. // Normal case is base-8 (octal) format.
  454. return p.parseOctal(b)
  455. }
  456. func (p *parser) parseOctal(b []byte) int64 {
  457. // Because unused fields are filled with NULs, we need
  458. // to skip leading NULs. Fields may also be padded with
  459. // spaces or NULs.
  460. // So we remove leading and trailing NULs and spaces to
  461. // be sure.
  462. b = bytes.Trim(b, " \x00")
  463. if len(b) == 0 {
  464. return 0
  465. }
  466. x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
  467. if perr != nil {
  468. p.err = ErrHeader
  469. }
  470. return int64(x)
  471. }
  472. // skipUnread skips any unread bytes in the existing file entry, as well as any
  473. // alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
  474. // encountered in the data portion; it is okay to hit io.EOF in the padding.
  475. //
  476. // Note that this function still works properly even when sparse files are being
  477. // used since numBytes returns the bytes remaining in the underlying io.Reader.
  478. func (tr *Reader) skipUnread() error {
  479. dataSkip := tr.numBytes() // Number of data bytes to skip
  480. totalSkip := dataSkip + tr.pad // Total number of bytes to skip
  481. tr.curr, tr.pad = nil, 0
  482. // If possible, Seek to the last byte before the end of the data section.
  483. // Do this because Seek is often lazy about reporting errors; this will mask
  484. // the fact that the tar stream may be truncated. We can rely on the
  485. // io.CopyN done shortly afterwards to trigger any IO errors.
  486. var seekSkipped int64 // Number of bytes skipped via Seek
  487. if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
  488. // Not all io.Seeker can actually Seek. For example, os.Stdin implements
  489. // io.Seeker, but calling Seek always returns an error and performs
  490. // no action. Thus, we try an innocent seek to the current position
  491. // to see if Seek is really supported.
  492. pos1, err := sr.Seek(0, os.SEEK_CUR)
  493. if err == nil {
  494. // Seek seems supported, so perform the real Seek.
  495. pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR)
  496. if err != nil {
  497. tr.err = err
  498. return tr.err
  499. }
  500. seekSkipped = pos2 - pos1
  501. }
  502. }
  503. var copySkipped int64 // Number of bytes skipped via CopyN
  504. copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
  505. if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip {
  506. tr.err = io.ErrUnexpectedEOF
  507. }
  508. return tr.err
  509. }
  510. func (tr *Reader) verifyChecksum(header []byte) bool {
  511. if tr.err != nil {
  512. return false
  513. }
  514. var p parser
  515. given := p.parseOctal(header[148:156])
  516. unsigned, signed := checksum(header)
  517. return p.err == nil && (given == unsigned || given == signed)
  518. }
  519. // readHeader reads the next block header and assumes that the underlying reader
  520. // is already aligned to a block boundary.
  521. //
  522. // The err will be set to io.EOF only when one of the following occurs:
  523. // * Exactly 0 bytes are read and EOF is hit.
  524. // * Exactly 1 block of zeros is read and EOF is hit.
  525. // * At least 2 blocks of zeros are read.
  526. func (tr *Reader) readHeader() *Header {
  527. header := tr.hdrBuff[:]
  528. copy(header, zeroBlock)
  529. if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
  530. return nil // io.EOF is okay here
  531. }
  532. // Two blocks of zero bytes marks the end of the archive.
  533. if bytes.Equal(header, zeroBlock[0:blockSize]) {
  534. if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
  535. return nil // io.EOF is okay here
  536. }
  537. if bytes.Equal(header, zeroBlock[0:blockSize]) {
  538. tr.err = io.EOF
  539. } else {
  540. tr.err = ErrHeader // zero block and then non-zero block
  541. }
  542. return nil
  543. }
  544. if !tr.verifyChecksum(header) {
  545. tr.err = ErrHeader
  546. return nil
  547. }
  548. // Unpack
  549. var p parser
  550. hdr := new(Header)
  551. s := slicer(header)
  552. hdr.Name = p.parseString(s.next(100))
  553. hdr.Mode = p.parseNumeric(s.next(8))
  554. hdr.Uid = int(p.parseNumeric(s.next(8)))
  555. hdr.Gid = int(p.parseNumeric(s.next(8)))
  556. hdr.Size = p.parseNumeric(s.next(12))
  557. hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0)
  558. s.next(8) // chksum
  559. hdr.Typeflag = s.next(1)[0]
  560. hdr.Linkname = p.parseString(s.next(100))
  561. // The remainder of the header depends on the value of magic.
  562. // The original (v7) version of tar had no explicit magic field,
  563. // so its magic bytes, like the rest of the block, are NULs.
  564. magic := string(s.next(8)) // contains version field as well.
  565. var format string
  566. switch {
  567. case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988)
  568. if string(header[508:512]) == "tar\x00" {
  569. format = "star"
  570. } else {
  571. format = "posix"
  572. }
  573. case magic == "ustar \x00": // old GNU tar
  574. format = "gnu"
  575. }
  576. switch format {
  577. case "posix", "gnu", "star":
  578. hdr.Uname = p.parseString(s.next(32))
  579. hdr.Gname = p.parseString(s.next(32))
  580. devmajor := s.next(8)
  581. devminor := s.next(8)
  582. if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
  583. hdr.Devmajor = p.parseNumeric(devmajor)
  584. hdr.Devminor = p.parseNumeric(devminor)
  585. }
  586. var prefix string
  587. switch format {
  588. case "posix", "gnu":
  589. prefix = p.parseString(s.next(155))
  590. case "star":
  591. prefix = p.parseString(s.next(131))
  592. hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0)
  593. hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0)
  594. }
  595. if len(prefix) > 0 {
  596. hdr.Name = prefix + "/" + hdr.Name
  597. }
  598. }
  599. if p.err != nil {
  600. tr.err = p.err
  601. return nil
  602. }
  603. nb := hdr.Size
  604. if isHeaderOnlyType(hdr.Typeflag) {
  605. nb = 0
  606. }
  607. if nb < 0 {
  608. tr.err = ErrHeader
  609. return nil
  610. }
  611. // Set the current file reader.
  612. tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
  613. tr.curr = &regFileReader{r: tr.r, nb: nb}
  614. // Check for old GNU sparse format entry.
  615. if hdr.Typeflag == TypeGNUSparse {
  616. // Get the real size of the file.
  617. hdr.Size = p.parseNumeric(header[483:495])
  618. if p.err != nil {
  619. tr.err = p.err
  620. return nil
  621. }
  622. // Read the sparse map.
  623. sp := tr.readOldGNUSparseMap(header)
  624. if tr.err != nil {
  625. return nil
  626. }
  627. // Current file is a GNU sparse file. Update the current file reader.
  628. tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
  629. if tr.err != nil {
  630. return nil
  631. }
  632. }
  633. return hdr
  634. }
  635. // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
  636. // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
  637. // then one or more extension headers are used to store the rest of the sparse map.
  638. func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
  639. var p parser
  640. isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0
  641. spCap := oldGNUSparseMainHeaderNumEntries
  642. if isExtended {
  643. spCap += oldGNUSparseExtendedHeaderNumEntries
  644. }
  645. sp := make([]sparseEntry, 0, spCap)
  646. s := slicer(header[oldGNUSparseMainHeaderOffset:])
  647. // Read the four entries from the main tar header
  648. for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ {
  649. offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
  650. numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
  651. if p.err != nil {
  652. tr.err = p.err
  653. return nil
  654. }
  655. if offset == 0 && numBytes == 0 {
  656. break
  657. }
  658. sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
  659. }
  660. for isExtended {
  661. // There are more entries. Read an extension header and parse its entries.
  662. sparseHeader := make([]byte, blockSize)
  663. if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil {
  664. return nil
  665. }
  666. isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0
  667. s = slicer(sparseHeader)
  668. for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ {
  669. offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
  670. numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
  671. if p.err != nil {
  672. tr.err = p.err
  673. return nil
  674. }
  675. if offset == 0 && numBytes == 0 {
  676. break
  677. }
  678. sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
  679. }
  680. }
  681. return sp
  682. }
  683. // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
  684. // version 1.0. The format of the sparse map consists of a series of
  685. // newline-terminated numeric fields. The first field is the number of entries
  686. // and is always present. Following this are the entries, consisting of two
  687. // fields (offset, numBytes). This function must stop reading at the end
  688. // boundary of the block containing the last newline.
  689. //
  690. // Note that the GNU manual says that numeric values should be encoded in octal
  691. // format. However, the GNU tar utility itself outputs these values in decimal.
  692. // As such, this library treats values as being encoded in decimal.
  693. func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
  694. var cntNewline int64
  695. var buf bytes.Buffer
  696. var blk = make([]byte, blockSize)
  697. // feedTokens copies data in numBlock chunks from r into buf until there are
  698. // at least cnt newlines in buf. It will not read more blocks than needed.
  699. var feedTokens = func(cnt int64) error {
  700. for cntNewline < cnt {
  701. if _, err := io.ReadFull(r, blk); err != nil {
  702. if err == io.EOF {
  703. err = io.ErrUnexpectedEOF
  704. }
  705. return err
  706. }
  707. buf.Write(blk)
  708. for _, c := range blk {
  709. if c == '\n' {
  710. cntNewline++
  711. }
  712. }
  713. }
  714. return nil
  715. }
  716. // nextToken gets the next token delimited by a newline. This assumes that
  717. // at least one newline exists in the buffer.
  718. var nextToken = func() string {
  719. cntNewline--
  720. tok, _ := buf.ReadString('\n')
  721. return tok[:len(tok)-1] // Cut off newline
  722. }
  723. // Parse for the number of entries.
  724. // Use integer overflow resistant math to check this.
  725. if err := feedTokens(1); err != nil {
  726. return nil, err
  727. }
  728. numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int
  729. if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
  730. return nil, ErrHeader
  731. }
  732. // Parse for all member entries.
  733. // numEntries is trusted after this since a potential attacker must have
  734. // committed resources proportional to what this library used.
  735. if err := feedTokens(2 * numEntries); err != nil {
  736. return nil, err
  737. }
  738. sp := make([]sparseEntry, 0, numEntries)
  739. for i := int64(0); i < numEntries; i++ {
  740. offset, err := strconv.ParseInt(nextToken(), 10, 64)
  741. if err != nil {
  742. return nil, ErrHeader
  743. }
  744. numBytes, err := strconv.ParseInt(nextToken(), 10, 64)
  745. if err != nil {
  746. return nil, ErrHeader
  747. }
  748. sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
  749. }
  750. return sp, nil
  751. }
  752. // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
  753. // version 0.1. The sparse map is stored in the PAX headers.
  754. func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) {
  755. // Get number of entries.
  756. // Use integer overflow resistant math to check this.
  757. numEntriesStr := extHdrs[paxGNUSparseNumBlocks]
  758. numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
  759. if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
  760. return nil, ErrHeader
  761. }
  762. // There should be two numbers in sparseMap for each entry.
  763. sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",")
  764. if int64(len(sparseMap)) != 2*numEntries {
  765. return nil, ErrHeader
  766. }
  767. // Loop through the entries in the sparse map.
  768. // numEntries is trusted now.
  769. sp := make([]sparseEntry, 0, numEntries)
  770. for i := int64(0); i < numEntries; i++ {
  771. offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64)
  772. if err != nil {
  773. return nil, ErrHeader
  774. }
  775. numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64)
  776. if err != nil {
  777. return nil, ErrHeader
  778. }
  779. sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
  780. }
  781. return sp, nil
  782. }
  783. // numBytes returns the number of bytes left to read in the current file's entry
  784. // in the tar archive, or 0 if there is no current file.
  785. func (tr *Reader) numBytes() int64 {
  786. if tr.curr == nil {
  787. // No current file, so no bytes
  788. return 0
  789. }
  790. return tr.curr.numBytes()
  791. }
  792. // Read reads from the current entry in the tar archive.
  793. // It returns 0, io.EOF when it reaches the end of that entry,
  794. // until Next is called to advance to the next entry.
  795. //
  796. // Calling Read on special types like TypeLink, TypeSymLink, TypeChar,
  797. // TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what
  798. // the Header.Size claims.
  799. func (tr *Reader) Read(b []byte) (n int, err error) {
  800. if tr.err != nil {
  801. return 0, tr.err
  802. }
  803. if tr.curr == nil {
  804. return 0, io.EOF
  805. }
  806. n, err = tr.curr.Read(b)
  807. if err != nil && err != io.EOF {
  808. tr.err = err
  809. }
  810. return
  811. }
  812. func (rfr *regFileReader) Read(b []byte) (n int, err error) {
  813. if rfr.nb == 0 {
  814. // file consumed
  815. return 0, io.EOF
  816. }
  817. if int64(len(b)) > rfr.nb {
  818. b = b[0:rfr.nb]
  819. }
  820. n, err = rfr.r.Read(b)
  821. rfr.nb -= int64(n)
  822. if err == io.EOF && rfr.nb > 0 {
  823. err = io.ErrUnexpectedEOF
  824. }
  825. return
  826. }
  827. // numBytes returns the number of bytes left to read in the file's data in the tar archive.
  828. func (rfr *regFileReader) numBytes() int64 {
  829. return rfr.nb
  830. }
  831. // newSparseFileReader creates a new sparseFileReader, but validates all of the
  832. // sparse entries before doing so.
  833. func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
  834. if total < 0 {
  835. return nil, ErrHeader // Total size cannot be negative
  836. }
  837. // Validate all sparse entries. These are the same checks as performed by
  838. // the BSD tar utility.
  839. for i, s := range sp {
  840. switch {
  841. case s.offset < 0 || s.numBytes < 0:
  842. return nil, ErrHeader // Negative values are never okay
  843. case s.offset > math.MaxInt64-s.numBytes:
  844. return nil, ErrHeader // Integer overflow with large length
  845. case s.offset+s.numBytes > total:
  846. return nil, ErrHeader // Region extends beyond the "real" size
  847. case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
  848. return nil, ErrHeader // Regions can't overlap and must be in order
  849. }
  850. }
  851. return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
  852. }
  853. // readHole reads a sparse hole ending at endOffset.
  854. func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
  855. n64 := endOffset - sfr.pos
  856. if n64 > int64(len(b)) {
  857. n64 = int64(len(b))
  858. }
  859. n := int(n64)
  860. for i := 0; i < n; i++ {
  861. b[i] = 0
  862. }
  863. sfr.pos += n64
  864. return n
  865. }
  866. // Read reads the sparse file data in expanded form.
  867. func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
  868. // Skip past all empty fragments.
  869. for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
  870. sfr.sp = sfr.sp[1:]
  871. }
  872. // If there are no more fragments, then it is possible that there
  873. // is one last sparse hole.
  874. if len(sfr.sp) == 0 {
  875. // This behavior matches the BSD tar utility.
  876. // However, GNU tar stops returning data even if sfr.total is unmet.
  877. if sfr.pos < sfr.total {
  878. return sfr.readHole(b, sfr.total), nil
  879. }
  880. return 0, io.EOF
  881. }
  882. // In front of a data fragment, so read a hole.
  883. if sfr.pos < sfr.sp[0].offset {
  884. return sfr.readHole(b, sfr.sp[0].offset), nil
  885. }
  886. // In a data fragment, so read from it.
  887. // This math is overflow free since we verify that offset and numBytes can
  888. // be safely added when creating the sparseFileReader.
  889. endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
  890. bytesLeft := endPos - sfr.pos // Bytes left in fragment
  891. if int64(len(b)) > bytesLeft {
  892. b = b[:bytesLeft]
  893. }
  894. n, err = sfr.rfr.Read(b)
  895. sfr.pos += int64(n)
  896. if err == io.EOF {
  897. if sfr.pos < endPos {
  898. err = io.ErrUnexpectedEOF // There was supposed to be more data
  899. } else if sfr.pos < sfr.total {
  900. err = nil // There is still an implicit sparse hole at the end
  901. }
  902. }
  903. if sfr.pos == endPos {
  904. sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
  905. }
  906. return n, err
  907. }
  908. // numBytes returns the number of bytes left to read in the sparse file's
  909. // sparse-encoded data in the tar archive.
  910. func (sfr *sparseFileReader) numBytes() int64 {
  911. return sfr.rfr.numBytes()
  912. }