reader.go 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package tar
  5. // TODO(dsymonds):
  6. // - pax extensions
  7. import (
  8. "bytes"
  9. "errors"
  10. "io"
  11. "io/ioutil"
  12. "math"
  13. "os"
  14. "strconv"
  15. "strings"
  16. "time"
  17. )
  18. var (
  19. ErrHeader = errors.New("archive/tar: invalid tar header")
  20. )
  21. const maxNanoSecondIntSize = 9
  22. // A Reader provides sequential access to the contents of a tar archive.
  23. // A tar archive consists of a sequence of files.
  24. // The Next method advances to the next file in the archive (including the first),
  25. // and then it can be treated as an io.Reader to access the file's data.
  26. type Reader struct {
  27. r io.Reader
  28. err error
  29. pad int64 // amount of padding (ignored) after current file entry
  30. curr numBytesReader // reader for current file entry
  31. hdrBuff [blockSize]byte // buffer to use in readHeader
  32. RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this.
  33. rawBytes *bytes.Buffer // last raw bits
  34. }
  35. type parser struct {
  36. err error // Last error seen
  37. }
  38. // RawBytes accesses the raw bytes of the archive, apart from the file payload itself.
  39. // This includes the header and padding.
  40. //
  41. // This call resets the current rawbytes buffer
  42. //
  43. // Only when RawAccounting is enabled, otherwise this returns nil
  44. func (tr *Reader) RawBytes() []byte {
  45. if !tr.RawAccounting {
  46. return nil
  47. }
  48. if tr.rawBytes == nil {
  49. tr.rawBytes = bytes.NewBuffer(nil)
  50. }
  51. // if we've read them, then flush them.
  52. defer tr.rawBytes.Reset()
  53. return tr.rawBytes.Bytes()
  54. }
  55. // A numBytesReader is an io.Reader with a numBytes method, returning the number
  56. // of bytes remaining in the underlying encoded data.
  57. type numBytesReader interface {
  58. io.Reader
  59. numBytes() int64
  60. }
  61. // A regFileReader is a numBytesReader for reading file data from a tar archive.
  62. type regFileReader struct {
  63. r io.Reader // underlying reader
  64. nb int64 // number of unread bytes for current file entry
  65. }
  66. // A sparseFileReader is a numBytesReader for reading sparse file data from a
  67. // tar archive.
  68. type sparseFileReader struct {
  69. rfr numBytesReader // Reads the sparse-encoded file data
  70. sp []sparseEntry // The sparse map for the file
  71. pos int64 // Keeps track of file position
  72. total int64 // Total size of the file
  73. }
  74. // A sparseEntry holds a single entry in a sparse file's sparse map.
  75. //
  76. // Sparse files are represented using a series of sparseEntrys.
  77. // Despite the name, a sparseEntry represents an actual data fragment that
  78. // references data found in the underlying archive stream. All regions not
  79. // covered by a sparseEntry are logically filled with zeros.
  80. //
  81. // For example, if the underlying raw file contains the 10-byte data:
  82. // var compactData = "abcdefgh"
  83. //
  84. // And the sparse map has the following entries:
  85. // var sp = []sparseEntry{
  86. // {offset: 2, numBytes: 5} // Data fragment for [2..7]
  87. // {offset: 18, numBytes: 3} // Data fragment for [18..21]
  88. // }
  89. //
  90. // Then the content of the resulting sparse file with a "real" size of 25 is:
  91. // var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
  92. type sparseEntry struct {
  93. offset int64 // Starting position of the fragment
  94. numBytes int64 // Length of the fragment
  95. }
  96. // Keywords for GNU sparse files in a PAX extended header
  97. const (
  98. paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
  99. paxGNUSparseOffset = "GNU.sparse.offset"
  100. paxGNUSparseNumBytes = "GNU.sparse.numbytes"
  101. paxGNUSparseMap = "GNU.sparse.map"
  102. paxGNUSparseName = "GNU.sparse.name"
  103. paxGNUSparseMajor = "GNU.sparse.major"
  104. paxGNUSparseMinor = "GNU.sparse.minor"
  105. paxGNUSparseSize = "GNU.sparse.size"
  106. paxGNUSparseRealSize = "GNU.sparse.realsize"
  107. )
  108. // Keywords for old GNU sparse headers
  109. const (
  110. oldGNUSparseMainHeaderOffset = 386
  111. oldGNUSparseMainHeaderIsExtendedOffset = 482
  112. oldGNUSparseMainHeaderNumEntries = 4
  113. oldGNUSparseExtendedHeaderIsExtendedOffset = 504
  114. oldGNUSparseExtendedHeaderNumEntries = 21
  115. oldGNUSparseOffsetSize = 12
  116. oldGNUSparseNumBytesSize = 12
  117. )
  118. // NewReader creates a new Reader reading from r.
  119. func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
  120. // Next advances to the next entry in the tar archive.
  121. //
  122. // io.EOF is returned at the end of the input.
  123. func (tr *Reader) Next() (*Header, error) {
  124. if tr.RawAccounting {
  125. if tr.rawBytes == nil {
  126. tr.rawBytes = bytes.NewBuffer(nil)
  127. } else {
  128. tr.rawBytes.Reset()
  129. }
  130. }
  131. if tr.err != nil {
  132. return nil, tr.err
  133. }
  134. var hdr *Header
  135. var extHdrs map[string]string
  136. // Externally, Next iterates through the tar archive as if it is a series of
  137. // files. Internally, the tar format often uses fake "files" to add meta
  138. // data that describes the next file. These meta data "files" should not
  139. // normally be visible to the outside. As such, this loop iterates through
  140. // one or more "header files" until it finds a "normal file".
  141. loop:
  142. for {
  143. tr.err = tr.skipUnread()
  144. if tr.err != nil {
  145. return nil, tr.err
  146. }
  147. hdr = tr.readHeader()
  148. if tr.err != nil {
  149. return nil, tr.err
  150. }
  151. // Check for PAX/GNU special headers and files.
  152. switch hdr.Typeflag {
  153. case TypeXHeader:
  154. extHdrs, tr.err = parsePAX(tr)
  155. if tr.err != nil {
  156. return nil, tr.err
  157. }
  158. continue loop // This is a meta header affecting the next header
  159. case TypeGNULongName, TypeGNULongLink:
  160. var realname []byte
  161. realname, tr.err = ioutil.ReadAll(tr)
  162. if tr.err != nil {
  163. return nil, tr.err
  164. }
  165. if tr.RawAccounting {
  166. if _, tr.err = tr.rawBytes.Write(realname); tr.err != nil {
  167. return nil, tr.err
  168. }
  169. }
  170. // Convert GNU extensions to use PAX headers.
  171. if extHdrs == nil {
  172. extHdrs = make(map[string]string)
  173. }
  174. var p parser
  175. switch hdr.Typeflag {
  176. case TypeGNULongName:
  177. extHdrs[paxPath] = p.parseString(realname)
  178. case TypeGNULongLink:
  179. extHdrs[paxLinkpath] = p.parseString(realname)
  180. }
  181. if p.err != nil {
  182. tr.err = p.err
  183. return nil, tr.err
  184. }
  185. continue loop // This is a meta header affecting the next header
  186. default:
  187. mergePAX(hdr, extHdrs)
  188. // Check for a PAX format sparse file
  189. sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs)
  190. if err != nil {
  191. tr.err = err
  192. return nil, err
  193. }
  194. if sp != nil {
  195. // Current file is a PAX format GNU sparse file.
  196. // Set the current file reader to a sparse file reader.
  197. tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
  198. if tr.err != nil {
  199. return nil, tr.err
  200. }
  201. }
  202. break loop // This is a file, so stop
  203. }
  204. }
  205. return hdr, nil
  206. }
  207. // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
  208. // this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to
  209. // be treated as a regular file.
  210. func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) {
  211. var sparseFormat string
  212. // Check for sparse format indicators
  213. major, majorOk := headers[paxGNUSparseMajor]
  214. minor, minorOk := headers[paxGNUSparseMinor]
  215. sparseName, sparseNameOk := headers[paxGNUSparseName]
  216. _, sparseMapOk := headers[paxGNUSparseMap]
  217. sparseSize, sparseSizeOk := headers[paxGNUSparseSize]
  218. sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize]
  219. // Identify which, if any, sparse format applies from which PAX headers are set
  220. if majorOk && minorOk {
  221. sparseFormat = major + "." + minor
  222. } else if sparseNameOk && sparseMapOk {
  223. sparseFormat = "0.1"
  224. } else if sparseSizeOk {
  225. sparseFormat = "0.0"
  226. } else {
  227. // Not a PAX format GNU sparse file.
  228. return nil, nil
  229. }
  230. // Check for unknown sparse format
  231. if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" {
  232. return nil, nil
  233. }
  234. // Update hdr from GNU sparse PAX headers
  235. if sparseNameOk {
  236. hdr.Name = sparseName
  237. }
  238. if sparseSizeOk {
  239. realSize, err := strconv.ParseInt(sparseSize, 10, 0)
  240. if err != nil {
  241. return nil, ErrHeader
  242. }
  243. hdr.Size = realSize
  244. } else if sparseRealSizeOk {
  245. realSize, err := strconv.ParseInt(sparseRealSize, 10, 0)
  246. if err != nil {
  247. return nil, ErrHeader
  248. }
  249. hdr.Size = realSize
  250. }
  251. // Set up the sparse map, according to the particular sparse format in use
  252. var sp []sparseEntry
  253. var err error
  254. switch sparseFormat {
  255. case "0.0", "0.1":
  256. sp, err = readGNUSparseMap0x1(headers)
  257. case "1.0":
  258. sp, err = readGNUSparseMap1x0(tr.curr)
  259. }
  260. return sp, err
  261. }
  262. // mergePAX merges well known headers according to PAX standard.
  263. // In general headers with the same name as those found
  264. // in the header struct overwrite those found in the header
  265. // struct with higher precision or longer values. Esp. useful
  266. // for name and linkname fields.
  267. func mergePAX(hdr *Header, headers map[string]string) error {
  268. for k, v := range headers {
  269. switch k {
  270. case paxPath:
  271. hdr.Name = v
  272. case paxLinkpath:
  273. hdr.Linkname = v
  274. case paxGname:
  275. hdr.Gname = v
  276. case paxUname:
  277. hdr.Uname = v
  278. case paxUid:
  279. uid, err := strconv.ParseInt(v, 10, 0)
  280. if err != nil {
  281. return err
  282. }
  283. hdr.Uid = int(uid)
  284. case paxGid:
  285. gid, err := strconv.ParseInt(v, 10, 0)
  286. if err != nil {
  287. return err
  288. }
  289. hdr.Gid = int(gid)
  290. case paxAtime:
  291. t, err := parsePAXTime(v)
  292. if err != nil {
  293. return err
  294. }
  295. hdr.AccessTime = t
  296. case paxMtime:
  297. t, err := parsePAXTime(v)
  298. if err != nil {
  299. return err
  300. }
  301. hdr.ModTime = t
  302. case paxCtime:
  303. t, err := parsePAXTime(v)
  304. if err != nil {
  305. return err
  306. }
  307. hdr.ChangeTime = t
  308. case paxSize:
  309. size, err := strconv.ParseInt(v, 10, 0)
  310. if err != nil {
  311. return err
  312. }
  313. hdr.Size = int64(size)
  314. default:
  315. if strings.HasPrefix(k, paxXattr) {
  316. if hdr.Xattrs == nil {
  317. hdr.Xattrs = make(map[string]string)
  318. }
  319. hdr.Xattrs[k[len(paxXattr):]] = v
  320. }
  321. }
  322. }
  323. return nil
  324. }
  325. // parsePAXTime takes a string of the form %d.%d as described in
  326. // the PAX specification.
  327. func parsePAXTime(t string) (time.Time, error) {
  328. buf := []byte(t)
  329. pos := bytes.IndexByte(buf, '.')
  330. var seconds, nanoseconds int64
  331. var err error
  332. if pos == -1 {
  333. seconds, err = strconv.ParseInt(t, 10, 0)
  334. if err != nil {
  335. return time.Time{}, err
  336. }
  337. } else {
  338. seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0)
  339. if err != nil {
  340. return time.Time{}, err
  341. }
  342. nano_buf := string(buf[pos+1:])
  343. // Pad as needed before converting to a decimal.
  344. // For example .030 -> .030000000 -> 30000000 nanoseconds
  345. if len(nano_buf) < maxNanoSecondIntSize {
  346. // Right pad
  347. nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf))
  348. } else if len(nano_buf) > maxNanoSecondIntSize {
  349. // Right truncate
  350. nano_buf = nano_buf[:maxNanoSecondIntSize]
  351. }
  352. nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0)
  353. if err != nil {
  354. return time.Time{}, err
  355. }
  356. }
  357. ts := time.Unix(seconds, nanoseconds)
  358. return ts, nil
  359. }
  360. // parsePAX parses PAX headers.
  361. // If an extended header (type 'x') is invalid, ErrHeader is returned
  362. func parsePAX(r io.Reader) (map[string]string, error) {
  363. buf, err := ioutil.ReadAll(r)
  364. if err != nil {
  365. return nil, err
  366. }
  367. // leaving this function for io.Reader makes it more testable
  368. if tr, ok := r.(*Reader); ok && tr.RawAccounting {
  369. if _, err = tr.rawBytes.Write(buf); err != nil {
  370. return nil, err
  371. }
  372. }
  373. sbuf := string(buf)
  374. // For GNU PAX sparse format 0.0 support.
  375. // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers.
  376. var sparseMap bytes.Buffer
  377. headers := make(map[string]string)
  378. // Each record is constructed as
  379. // "%d %s=%s\n", length, keyword, value
  380. for len(sbuf) > 0 {
  381. key, value, residual, err := parsePAXRecord(sbuf)
  382. if err != nil {
  383. return nil, ErrHeader
  384. }
  385. sbuf = residual
  386. keyStr := string(key)
  387. if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes {
  388. // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
  389. sparseMap.WriteString(value)
  390. sparseMap.Write([]byte{','})
  391. } else {
  392. // Normal key. Set the value in the headers map.
  393. headers[keyStr] = string(value)
  394. }
  395. }
  396. if sparseMap.Len() != 0 {
  397. // Add sparse info to headers, chopping off the extra comma
  398. sparseMap.Truncate(sparseMap.Len() - 1)
  399. headers[paxGNUSparseMap] = sparseMap.String()
  400. }
  401. return headers, nil
  402. }
  403. // parsePAXRecord parses the input PAX record string into a key-value pair.
  404. // If parsing is successful, it will slice off the currently read record and
  405. // return the remainder as r.
  406. //
  407. // A PAX record is of the following form:
  408. // "%d %s=%s\n" % (size, key, value)
  409. func parsePAXRecord(s string) (k, v, r string, err error) {
  410. // The size field ends at the first space.
  411. sp := strings.IndexByte(s, ' ')
  412. if sp == -1 {
  413. return "", "", s, ErrHeader
  414. }
  415. // Parse the first token as a decimal integer.
  416. n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
  417. if perr != nil || n < 5 || int64(len(s)) < n {
  418. return "", "", s, ErrHeader
  419. }
  420. // Extract everything between the space and the final newline.
  421. rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
  422. if nl != "\n" {
  423. return "", "", s, ErrHeader
  424. }
  425. // The first equals separates the key from the value.
  426. eq := strings.IndexByte(rec, '=')
  427. if eq == -1 {
  428. return "", "", s, ErrHeader
  429. }
  430. return rec[:eq], rec[eq+1:], rem, nil
  431. }
  432. // parseString parses bytes as a NUL-terminated C-style string.
  433. // If a NUL byte is not found then the whole slice is returned as a string.
  434. func (*parser) parseString(b []byte) string {
  435. n := 0
  436. for n < len(b) && b[n] != 0 {
  437. n++
  438. }
  439. return string(b[0:n])
  440. }
  441. // parseNumeric parses the input as being encoded in either base-256 or octal.
  442. // This function may return negative numbers.
  443. // If parsing fails or an integer overflow occurs, err will be set.
  444. func (p *parser) parseNumeric(b []byte) int64 {
  445. // Check for base-256 (binary) format first.
  446. // If the first bit is set, then all following bits constitute a two's
  447. // complement encoded number in big-endian byte order.
  448. if len(b) > 0 && b[0]&0x80 != 0 {
  449. // Handling negative numbers relies on the following identity:
  450. // -a-1 == ^a
  451. //
  452. // If the number is negative, we use an inversion mask to invert the
  453. // data bytes and treat the value as an unsigned number.
  454. var inv byte // 0x00 if positive or zero, 0xff if negative
  455. if b[0]&0x40 != 0 {
  456. inv = 0xff
  457. }
  458. var x uint64
  459. for i, c := range b {
  460. c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
  461. if i == 0 {
  462. c &= 0x7f // Ignore signal bit in first byte
  463. }
  464. if (x >> 56) > 0 {
  465. p.err = ErrHeader // Integer overflow
  466. return 0
  467. }
  468. x = x<<8 | uint64(c)
  469. }
  470. if (x >> 63) > 0 {
  471. p.err = ErrHeader // Integer overflow
  472. return 0
  473. }
  474. if inv == 0xff {
  475. return ^int64(x)
  476. }
  477. return int64(x)
  478. }
  479. // Normal case is base-8 (octal) format.
  480. return p.parseOctal(b)
  481. }
  482. func (p *parser) parseOctal(b []byte) int64 {
  483. // Because unused fields are filled with NULs, we need
  484. // to skip leading NULs. Fields may also be padded with
  485. // spaces or NULs.
  486. // So we remove leading and trailing NULs and spaces to
  487. // be sure.
  488. b = bytes.Trim(b, " \x00")
  489. if len(b) == 0 {
  490. return 0
  491. }
  492. x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
  493. if perr != nil {
  494. p.err = ErrHeader
  495. }
  496. return int64(x)
  497. }
  498. // skipUnread skips any unread bytes in the existing file entry, as well as any
  499. // alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
  500. // encountered in the data portion; it is okay to hit io.EOF in the padding.
  501. //
  502. // Note that this function still works properly even when sparse files are being
  503. // used since numBytes returns the bytes remaining in the underlying io.Reader.
  504. func (tr *Reader) skipUnread() error {
  505. dataSkip := tr.numBytes() // Number of data bytes to skip
  506. totalSkip := dataSkip + tr.pad // Total number of bytes to skip
  507. tr.curr, tr.pad = nil, 0
  508. if tr.RawAccounting {
  509. _, tr.err = io.CopyN(tr.rawBytes, tr.r, totalSkip)
  510. return tr.err
  511. }
  512. // If possible, Seek to the last byte before the end of the data section.
  513. // Do this because Seek is often lazy about reporting errors; this will mask
  514. // the fact that the tar stream may be truncated. We can rely on the
  515. // io.CopyN done shortly afterwards to trigger any IO errors.
  516. var seekSkipped int64 // Number of bytes skipped via Seek
  517. if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
  518. // Not all io.Seeker can actually Seek. For example, os.Stdin implements
  519. // io.Seeker, but calling Seek always returns an error and performs
  520. // no action. Thus, we try an innocent seek to the current position
  521. // to see if Seek is really supported.
  522. pos1, err := sr.Seek(0, os.SEEK_CUR)
  523. if err == nil {
  524. // Seek seems supported, so perform the real Seek.
  525. pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR)
  526. if err != nil {
  527. tr.err = err
  528. return tr.err
  529. }
  530. seekSkipped = pos2 - pos1
  531. }
  532. }
  533. var copySkipped int64 // Number of bytes skipped via CopyN
  534. copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
  535. if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip {
  536. tr.err = io.ErrUnexpectedEOF
  537. }
  538. return tr.err
  539. }
  540. func (tr *Reader) verifyChecksum(header []byte) bool {
  541. if tr.err != nil {
  542. return false
  543. }
  544. var p parser
  545. given := p.parseOctal(header[148:156])
  546. unsigned, signed := checksum(header)
  547. return p.err == nil && (given == unsigned || given == signed)
  548. }
  549. // readHeader reads the next block header and assumes that the underlying reader
  550. // is already aligned to a block boundary.
  551. //
  552. // The err will be set to io.EOF only when one of the following occurs:
  553. // * Exactly 0 bytes are read and EOF is hit.
  554. // * Exactly 1 block of zeros is read and EOF is hit.
  555. // * At least 2 blocks of zeros are read.
  556. func (tr *Reader) readHeader() *Header {
  557. header := tr.hdrBuff[:]
  558. copy(header, zeroBlock)
  559. if n, err := io.ReadFull(tr.r, header); err != nil {
  560. tr.err = err
  561. // because it could read some of the block, but reach EOF first
  562. if tr.err == io.EOF && tr.RawAccounting {
  563. if _, err := tr.rawBytes.Write(header[:n]); err != nil {
  564. tr.err = err
  565. }
  566. }
  567. return nil // io.EOF is okay here
  568. }
  569. if tr.RawAccounting {
  570. if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
  571. return nil
  572. }
  573. }
  574. // Two blocks of zero bytes marks the end of the archive.
  575. if bytes.Equal(header, zeroBlock[0:blockSize]) {
  576. if n, err := io.ReadFull(tr.r, header); err != nil {
  577. tr.err = err
  578. // because it could read some of the block, but reach EOF first
  579. if tr.err == io.EOF && tr.RawAccounting {
  580. if _, err := tr.rawBytes.Write(header[:n]); err != nil {
  581. tr.err = err
  582. }
  583. }
  584. return nil // io.EOF is okay here
  585. }
  586. if tr.RawAccounting {
  587. if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
  588. return nil
  589. }
  590. }
  591. if bytes.Equal(header, zeroBlock[0:blockSize]) {
  592. tr.err = io.EOF
  593. } else {
  594. tr.err = ErrHeader // zero block and then non-zero block
  595. }
  596. return nil
  597. }
  598. if !tr.verifyChecksum(header) {
  599. tr.err = ErrHeader
  600. return nil
  601. }
  602. // Unpack
  603. var p parser
  604. hdr := new(Header)
  605. s := slicer(header)
  606. hdr.Name = p.parseString(s.next(100))
  607. hdr.Mode = p.parseNumeric(s.next(8))
  608. hdr.Uid = int(p.parseNumeric(s.next(8)))
  609. hdr.Gid = int(p.parseNumeric(s.next(8)))
  610. hdr.Size = p.parseNumeric(s.next(12))
  611. hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0)
  612. s.next(8) // chksum
  613. hdr.Typeflag = s.next(1)[0]
  614. hdr.Linkname = p.parseString(s.next(100))
  615. // The remainder of the header depends on the value of magic.
  616. // The original (v7) version of tar had no explicit magic field,
  617. // so its magic bytes, like the rest of the block, are NULs.
  618. magic := string(s.next(8)) // contains version field as well.
  619. var format string
  620. switch {
  621. case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988)
  622. if string(header[508:512]) == "tar\x00" {
  623. format = "star"
  624. } else {
  625. format = "posix"
  626. }
  627. case magic == "ustar \x00": // old GNU tar
  628. format = "gnu"
  629. }
  630. switch format {
  631. case "posix", "gnu", "star":
  632. hdr.Uname = p.parseString(s.next(32))
  633. hdr.Gname = p.parseString(s.next(32))
  634. devmajor := s.next(8)
  635. devminor := s.next(8)
  636. if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
  637. hdr.Devmajor = p.parseNumeric(devmajor)
  638. hdr.Devminor = p.parseNumeric(devminor)
  639. }
  640. var prefix string
  641. switch format {
  642. case "posix", "gnu":
  643. prefix = p.parseString(s.next(155))
  644. case "star":
  645. prefix = p.parseString(s.next(131))
  646. hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0)
  647. hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0)
  648. }
  649. if len(prefix) > 0 {
  650. hdr.Name = prefix + "/" + hdr.Name
  651. }
  652. }
  653. if p.err != nil {
  654. tr.err = p.err
  655. return nil
  656. }
  657. nb := hdr.Size
  658. if isHeaderOnlyType(hdr.Typeflag) {
  659. nb = 0
  660. }
  661. if nb < 0 {
  662. tr.err = ErrHeader
  663. return nil
  664. }
  665. // Set the current file reader.
  666. tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
  667. tr.curr = &regFileReader{r: tr.r, nb: nb}
  668. // Check for old GNU sparse format entry.
  669. if hdr.Typeflag == TypeGNUSparse {
  670. // Get the real size of the file.
  671. hdr.Size = p.parseNumeric(header[483:495])
  672. if p.err != nil {
  673. tr.err = p.err
  674. return nil
  675. }
  676. // Read the sparse map.
  677. sp := tr.readOldGNUSparseMap(header)
  678. if tr.err != nil {
  679. return nil
  680. }
  681. // Current file is a GNU sparse file. Update the current file reader.
  682. tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
  683. if tr.err != nil {
  684. return nil
  685. }
  686. }
  687. return hdr
  688. }
  689. // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
  690. // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
  691. // then one or more extension headers are used to store the rest of the sparse map.
  692. func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
  693. var p parser
  694. isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0
  695. spCap := oldGNUSparseMainHeaderNumEntries
  696. if isExtended {
  697. spCap += oldGNUSparseExtendedHeaderNumEntries
  698. }
  699. sp := make([]sparseEntry, 0, spCap)
  700. s := slicer(header[oldGNUSparseMainHeaderOffset:])
  701. // Read the four entries from the main tar header
  702. for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ {
  703. offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
  704. numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
  705. if p.err != nil {
  706. tr.err = p.err
  707. return nil
  708. }
  709. if offset == 0 && numBytes == 0 {
  710. break
  711. }
  712. sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
  713. }
  714. for isExtended {
  715. // There are more entries. Read an extension header and parse its entries.
  716. sparseHeader := make([]byte, blockSize)
  717. if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil {
  718. return nil
  719. }
  720. if tr.RawAccounting {
  721. if _, tr.err = tr.rawBytes.Write(sparseHeader); tr.err != nil {
  722. return nil
  723. }
  724. }
  725. isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0
  726. s = slicer(sparseHeader)
  727. for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ {
  728. offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
  729. numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
  730. if p.err != nil {
  731. tr.err = p.err
  732. return nil
  733. }
  734. if offset == 0 && numBytes == 0 {
  735. break
  736. }
  737. sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
  738. }
  739. }
  740. return sp
  741. }
  742. // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
  743. // version 1.0. The format of the sparse map consists of a series of
  744. // newline-terminated numeric fields. The first field is the number of entries
  745. // and is always present. Following this are the entries, consisting of two
  746. // fields (offset, numBytes). This function must stop reading at the end
  747. // boundary of the block containing the last newline.
  748. //
  749. // Note that the GNU manual says that numeric values should be encoded in octal
  750. // format. However, the GNU tar utility itself outputs these values in decimal.
  751. // As such, this library treats values as being encoded in decimal.
  752. func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
  753. var cntNewline int64
  754. var buf bytes.Buffer
  755. var blk = make([]byte, blockSize)
  756. // feedTokens copies data in numBlock chunks from r into buf until there are
  757. // at least cnt newlines in buf. It will not read more blocks than needed.
  758. var feedTokens = func(cnt int64) error {
  759. for cntNewline < cnt {
  760. if _, err := io.ReadFull(r, blk); err != nil {
  761. if err == io.EOF {
  762. err = io.ErrUnexpectedEOF
  763. }
  764. return err
  765. }
  766. buf.Write(blk)
  767. for _, c := range blk {
  768. if c == '\n' {
  769. cntNewline++
  770. }
  771. }
  772. }
  773. return nil
  774. }
  775. // nextToken gets the next token delimited by a newline. This assumes that
  776. // at least one newline exists in the buffer.
  777. var nextToken = func() string {
  778. cntNewline--
  779. tok, _ := buf.ReadString('\n')
  780. return tok[:len(tok)-1] // Cut off newline
  781. }
  782. // Parse for the number of entries.
  783. // Use integer overflow resistant math to check this.
  784. if err := feedTokens(1); err != nil {
  785. return nil, err
  786. }
  787. numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int
  788. if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
  789. return nil, ErrHeader
  790. }
  791. // Parse for all member entries.
  792. // numEntries is trusted after this since a potential attacker must have
  793. // committed resources proportional to what this library used.
  794. if err := feedTokens(2 * numEntries); err != nil {
  795. return nil, err
  796. }
  797. sp := make([]sparseEntry, 0, numEntries)
  798. for i := int64(0); i < numEntries; i++ {
  799. offset, err := strconv.ParseInt(nextToken(), 10, 64)
  800. if err != nil {
  801. return nil, ErrHeader
  802. }
  803. numBytes, err := strconv.ParseInt(nextToken(), 10, 64)
  804. if err != nil {
  805. return nil, ErrHeader
  806. }
  807. sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
  808. }
  809. return sp, nil
  810. }
  811. // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
  812. // version 0.1. The sparse map is stored in the PAX headers.
  813. func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) {
  814. // Get number of entries.
  815. // Use integer overflow resistant math to check this.
  816. numEntriesStr := extHdrs[paxGNUSparseNumBlocks]
  817. numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
  818. if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
  819. return nil, ErrHeader
  820. }
  821. // There should be two numbers in sparseMap for each entry.
  822. sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",")
  823. if int64(len(sparseMap)) != 2*numEntries {
  824. return nil, ErrHeader
  825. }
  826. // Loop through the entries in the sparse map.
  827. // numEntries is trusted now.
  828. sp := make([]sparseEntry, 0, numEntries)
  829. for i := int64(0); i < numEntries; i++ {
  830. offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64)
  831. if err != nil {
  832. return nil, ErrHeader
  833. }
  834. numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64)
  835. if err != nil {
  836. return nil, ErrHeader
  837. }
  838. sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
  839. }
  840. return sp, nil
  841. }
  842. // numBytes returns the number of bytes left to read in the current file's entry
  843. // in the tar archive, or 0 if there is no current file.
  844. func (tr *Reader) numBytes() int64 {
  845. if tr.curr == nil {
  846. // No current file, so no bytes
  847. return 0
  848. }
  849. return tr.curr.numBytes()
  850. }
  851. // Read reads from the current entry in the tar archive.
  852. // It returns 0, io.EOF when it reaches the end of that entry,
  853. // until Next is called to advance to the next entry.
  854. //
  855. // Calling Read on special types like TypeLink, TypeSymLink, TypeChar,
  856. // TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what
  857. // the Header.Size claims.
  858. func (tr *Reader) Read(b []byte) (n int, err error) {
  859. if tr.err != nil {
  860. return 0, tr.err
  861. }
  862. if tr.curr == nil {
  863. return 0, io.EOF
  864. }
  865. n, err = tr.curr.Read(b)
  866. if err != nil && err != io.EOF {
  867. tr.err = err
  868. }
  869. return
  870. }
  871. func (rfr *regFileReader) Read(b []byte) (n int, err error) {
  872. if rfr.nb == 0 {
  873. // file consumed
  874. return 0, io.EOF
  875. }
  876. if int64(len(b)) > rfr.nb {
  877. b = b[0:rfr.nb]
  878. }
  879. n, err = rfr.r.Read(b)
  880. rfr.nb -= int64(n)
  881. if err == io.EOF && rfr.nb > 0 {
  882. err = io.ErrUnexpectedEOF
  883. }
  884. return
  885. }
  886. // numBytes returns the number of bytes left to read in the file's data in the tar archive.
  887. func (rfr *regFileReader) numBytes() int64 {
  888. return rfr.nb
  889. }
  890. // newSparseFileReader creates a new sparseFileReader, but validates all of the
  891. // sparse entries before doing so.
  892. func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
  893. if total < 0 {
  894. return nil, ErrHeader // Total size cannot be negative
  895. }
  896. // Validate all sparse entries. These are the same checks as performed by
  897. // the BSD tar utility.
  898. for i, s := range sp {
  899. switch {
  900. case s.offset < 0 || s.numBytes < 0:
  901. return nil, ErrHeader // Negative values are never okay
  902. case s.offset > math.MaxInt64-s.numBytes:
  903. return nil, ErrHeader // Integer overflow with large length
  904. case s.offset+s.numBytes > total:
  905. return nil, ErrHeader // Region extends beyond the "real" size
  906. case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
  907. return nil, ErrHeader // Regions can't overlap and must be in order
  908. }
  909. }
  910. return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
  911. }
  912. // readHole reads a sparse hole ending at endOffset.
  913. func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
  914. n64 := endOffset - sfr.pos
  915. if n64 > int64(len(b)) {
  916. n64 = int64(len(b))
  917. }
  918. n := int(n64)
  919. for i := 0; i < n; i++ {
  920. b[i] = 0
  921. }
  922. sfr.pos += n64
  923. return n
  924. }
  925. // Read reads the sparse file data in expanded form.
  926. func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
  927. // Skip past all empty fragments.
  928. for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
  929. sfr.sp = sfr.sp[1:]
  930. }
  931. // If there are no more fragments, then it is possible that there
  932. // is one last sparse hole.
  933. if len(sfr.sp) == 0 {
  934. // This behavior matches the BSD tar utility.
  935. // However, GNU tar stops returning data even if sfr.total is unmet.
  936. if sfr.pos < sfr.total {
  937. return sfr.readHole(b, sfr.total), nil
  938. }
  939. return 0, io.EOF
  940. }
  941. // In front of a data fragment, so read a hole.
  942. if sfr.pos < sfr.sp[0].offset {
  943. return sfr.readHole(b, sfr.sp[0].offset), nil
  944. }
  945. // In a data fragment, so read from it.
  946. // This math is overflow free since we verify that offset and numBytes can
  947. // be safely added when creating the sparseFileReader.
  948. endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
  949. bytesLeft := endPos - sfr.pos // Bytes left in fragment
  950. if int64(len(b)) > bytesLeft {
  951. b = b[:bytesLeft]
  952. }
  953. n, err = sfr.rfr.Read(b)
  954. sfr.pos += int64(n)
  955. if err == io.EOF {
  956. if sfr.pos < endPos {
  957. err = io.ErrUnexpectedEOF // There was supposed to be more data
  958. } else if sfr.pos < sfr.total {
  959. err = nil // There is still an implicit sparse hole at the end
  960. }
  961. }
  962. if sfr.pos == endPos {
  963. sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
  964. }
  965. return n, err
  966. }
  967. // numBytes returns the number of bytes left to read in the sparse file's
  968. // sparse-encoded data in the tar archive.
  969. func (sfr *sparseFileReader) numBytes() int64 {
  970. return sfr.rfr.numBytes()
  971. }