reader.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package tar
  5. import (
  6. "bytes"
  7. "io"
  8. "io/ioutil"
  9. "strconv"
  10. "strings"
  11. "time"
  12. )
  13. // Reader provides sequential access to the contents of a tar archive.
  14. // Reader.Next advances to the next file in the archive (including the first),
  15. // and then Reader can be treated as an io.Reader to access the file's data.
  16. type Reader struct {
  17. r io.Reader
  18. pad int64 // Amount of padding (ignored) after current file entry
  19. curr fileReader // Reader for current file entry
  20. blk block // Buffer to use as temporary local storage
  21. // err is a persistent error.
  22. // It is only the responsibility of every exported method of Reader to
  23. // ensure that this error is sticky.
  24. err error
  25. RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this.
  26. rawBytes *bytes.Buffer // last raw bits
  27. }
  28. type fileReader interface {
  29. io.Reader
  30. fileState
  31. WriteTo(io.Writer) (int64, error)
  32. }
  33. // RawBytes accesses the raw bytes of the archive, apart from the file payload itself.
  34. // This includes the header and padding.
  35. //
  36. // # This call resets the current rawbytes buffer
  37. //
  38. // Only when RawAccounting is enabled, otherwise this returns nil
  39. func (tr *Reader) RawBytes() []byte {
  40. if !tr.RawAccounting {
  41. return nil
  42. }
  43. if tr.rawBytes == nil {
  44. tr.rawBytes = bytes.NewBuffer(nil)
  45. }
  46. defer tr.rawBytes.Reset() // if we've read them, then flush them.
  47. return tr.rawBytes.Bytes()
  48. }
  49. // NewReader creates a new Reader reading from r.
  50. func NewReader(r io.Reader) *Reader {
  51. return &Reader{r: r, curr: &regFileReader{r, 0}}
  52. }
  53. // Next advances to the next entry in the tar archive.
  54. // The Header.Size determines how many bytes can be read for the next file.
  55. // Any remaining data in the current file is automatically discarded.
  56. //
  57. // io.EOF is returned at the end of the input.
  58. func (tr *Reader) Next() (*Header, error) {
  59. if tr.err != nil {
  60. return nil, tr.err
  61. }
  62. hdr, err := tr.next()
  63. tr.err = err
  64. return hdr, err
  65. }
  66. func (tr *Reader) next() (*Header, error) {
  67. var paxHdrs map[string]string
  68. var gnuLongName, gnuLongLink string
  69. if tr.RawAccounting {
  70. if tr.rawBytes == nil {
  71. tr.rawBytes = bytes.NewBuffer(nil)
  72. } else {
  73. tr.rawBytes.Reset()
  74. }
  75. }
  76. // Externally, Next iterates through the tar archive as if it is a series of
  77. // files. Internally, the tar format often uses fake "files" to add meta
  78. // data that describes the next file. These meta data "files" should not
  79. // normally be visible to the outside. As such, this loop iterates through
  80. // one or more "header files" until it finds a "normal file".
  81. format := FormatUSTAR | FormatPAX | FormatGNU
  82. for {
  83. // Discard the remainder of the file and any padding.
  84. if err := discard(tr, tr.curr.PhysicalRemaining()); err != nil {
  85. return nil, err
  86. }
  87. n, err := tryReadFull(tr.r, tr.blk[:tr.pad])
  88. if err != nil {
  89. return nil, err
  90. }
  91. if tr.RawAccounting {
  92. tr.rawBytes.Write(tr.blk[:n])
  93. }
  94. tr.pad = 0
  95. hdr, rawHdr, err := tr.readHeader()
  96. if err != nil {
  97. return nil, err
  98. }
  99. if err := tr.handleRegularFile(hdr); err != nil {
  100. return nil, err
  101. }
  102. format.mayOnlyBe(hdr.Format)
  103. // Check for PAX/GNU special headers and files.
  104. switch hdr.Typeflag {
  105. case TypeXHeader, TypeXGlobalHeader:
  106. format.mayOnlyBe(FormatPAX)
  107. paxHdrs, err = parsePAX(tr)
  108. if err != nil {
  109. return nil, err
  110. }
  111. if hdr.Typeflag == TypeXGlobalHeader {
  112. if err = mergePAX(hdr, paxHdrs); err != nil {
  113. return nil, err
  114. }
  115. return &Header{
  116. Name: hdr.Name,
  117. Typeflag: hdr.Typeflag,
  118. Xattrs: hdr.Xattrs,
  119. PAXRecords: hdr.PAXRecords,
  120. Format: format,
  121. }, nil
  122. }
  123. continue // This is a meta header affecting the next header
  124. case TypeGNULongName, TypeGNULongLink:
  125. format.mayOnlyBe(FormatGNU)
  126. realname, err := ioutil.ReadAll(tr)
  127. if err != nil {
  128. return nil, err
  129. }
  130. if tr.RawAccounting {
  131. tr.rawBytes.Write(realname)
  132. }
  133. var p parser
  134. switch hdr.Typeflag {
  135. case TypeGNULongName:
  136. gnuLongName = p.parseString(realname)
  137. case TypeGNULongLink:
  138. gnuLongLink = p.parseString(realname)
  139. }
  140. continue // This is a meta header affecting the next header
  141. default:
  142. // The old GNU sparse format is handled here since it is technically
  143. // just a regular file with additional attributes.
  144. if err := mergePAX(hdr, paxHdrs); err != nil {
  145. return nil, err
  146. }
  147. if gnuLongName != "" {
  148. hdr.Name = gnuLongName
  149. }
  150. if gnuLongLink != "" {
  151. hdr.Linkname = gnuLongLink
  152. }
  153. if hdr.Typeflag == TypeRegA {
  154. if strings.HasSuffix(hdr.Name, "/") {
  155. hdr.Typeflag = TypeDir // Legacy archives use trailing slash for directories
  156. } else {
  157. hdr.Typeflag = TypeReg
  158. }
  159. }
  160. // The extended headers may have updated the size.
  161. // Thus, setup the regFileReader again after merging PAX headers.
  162. if err := tr.handleRegularFile(hdr); err != nil {
  163. return nil, err
  164. }
  165. // Sparse formats rely on being able to read from the logical data
  166. // section; there must be a preceding call to handleRegularFile.
  167. if err := tr.handleSparseFile(hdr, rawHdr); err != nil {
  168. return nil, err
  169. }
  170. // Set the final guess at the format.
  171. if format.has(FormatUSTAR) && format.has(FormatPAX) {
  172. format.mayOnlyBe(FormatUSTAR)
  173. }
  174. hdr.Format = format
  175. return hdr, nil // This is a file, so stop
  176. }
  177. }
  178. }
  179. // handleRegularFile sets up the current file reader and padding such that it
  180. // can only read the following logical data section. It will properly handle
  181. // special headers that contain no data section.
  182. func (tr *Reader) handleRegularFile(hdr *Header) error {
  183. nb := hdr.Size
  184. if isHeaderOnlyType(hdr.Typeflag) {
  185. nb = 0
  186. }
  187. if nb < 0 {
  188. return ErrHeader
  189. }
  190. tr.pad = blockPadding(nb)
  191. tr.curr = &regFileReader{r: tr.r, nb: nb}
  192. return nil
  193. }
  194. // handleSparseFile checks if the current file is a sparse format of any type
  195. // and sets the curr reader appropriately.
  196. func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block) error {
  197. var spd sparseDatas
  198. var err error
  199. if hdr.Typeflag == TypeGNUSparse {
  200. spd, err = tr.readOldGNUSparseMap(hdr, rawHdr)
  201. } else {
  202. spd, err = tr.readGNUSparsePAXHeaders(hdr)
  203. }
  204. // If sp is non-nil, then this is a sparse file.
  205. // Note that it is possible for len(sp) == 0.
  206. if err == nil && spd != nil {
  207. if isHeaderOnlyType(hdr.Typeflag) || !validateSparseEntries(spd, hdr.Size) {
  208. return ErrHeader
  209. }
  210. sph := invertSparseEntries(spd, hdr.Size)
  211. tr.curr = &sparseFileReader{tr.curr, sph, 0}
  212. }
  213. return err
  214. }
  215. // readGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers.
  216. // If they are found, then this function reads the sparse map and returns it.
  217. // This assumes that 0.0 headers have already been converted to 0.1 headers
  218. // by the PAX header parsing logic.
  219. func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header) (sparseDatas, error) {
  220. // Identify the version of GNU headers.
  221. var is1x0 bool
  222. major, minor := hdr.PAXRecords[paxGNUSparseMajor], hdr.PAXRecords[paxGNUSparseMinor]
  223. switch {
  224. case major == "0" && (minor == "0" || minor == "1"):
  225. is1x0 = false
  226. case major == "1" && minor == "0":
  227. is1x0 = true
  228. case major != "" || minor != "":
  229. return nil, nil // Unknown GNU sparse PAX version
  230. case hdr.PAXRecords[paxGNUSparseMap] != "":
  231. is1x0 = false // 0.0 and 0.1 did not have explicit version records, so guess
  232. default:
  233. return nil, nil // Not a PAX format GNU sparse file.
  234. }
  235. hdr.Format.mayOnlyBe(FormatPAX)
  236. // Update hdr from GNU sparse PAX headers.
  237. if name := hdr.PAXRecords[paxGNUSparseName]; name != "" {
  238. hdr.Name = name
  239. }
  240. size := hdr.PAXRecords[paxGNUSparseSize]
  241. if size == "" {
  242. size = hdr.PAXRecords[paxGNUSparseRealSize]
  243. }
  244. if size != "" {
  245. n, err := strconv.ParseInt(size, 10, 64)
  246. if err != nil {
  247. return nil, ErrHeader
  248. }
  249. hdr.Size = n
  250. }
  251. // Read the sparse map according to the appropriate format.
  252. if is1x0 {
  253. return readGNUSparseMap1x0(tr.curr)
  254. }
  255. return readGNUSparseMap0x1(hdr.PAXRecords)
  256. }
  257. // mergePAX merges paxHdrs into hdr for all relevant fields of Header.
  258. func mergePAX(hdr *Header, paxHdrs map[string]string) (err error) {
  259. for k, v := range paxHdrs {
  260. if v == "" {
  261. continue // Keep the original USTAR value
  262. }
  263. var id64 int64
  264. switch k {
  265. case paxPath:
  266. hdr.Name = v
  267. case paxLinkpath:
  268. hdr.Linkname = v
  269. case paxUname:
  270. hdr.Uname = v
  271. case paxGname:
  272. hdr.Gname = v
  273. case paxUid:
  274. id64, err = strconv.ParseInt(v, 10, 64)
  275. hdr.Uid = int(id64) // Integer overflow possible
  276. case paxGid:
  277. id64, err = strconv.ParseInt(v, 10, 64)
  278. hdr.Gid = int(id64) // Integer overflow possible
  279. case paxAtime:
  280. hdr.AccessTime, err = parsePAXTime(v)
  281. case paxMtime:
  282. hdr.ModTime, err = parsePAXTime(v)
  283. case paxCtime:
  284. hdr.ChangeTime, err = parsePAXTime(v)
  285. case paxSize:
  286. hdr.Size, err = strconv.ParseInt(v, 10, 64)
  287. default:
  288. if strings.HasPrefix(k, paxSchilyXattr) {
  289. if hdr.Xattrs == nil {
  290. hdr.Xattrs = make(map[string]string)
  291. }
  292. hdr.Xattrs[k[len(paxSchilyXattr):]] = v
  293. }
  294. }
  295. if err != nil {
  296. return ErrHeader
  297. }
  298. }
  299. hdr.PAXRecords = paxHdrs
  300. return nil
  301. }
  302. // parsePAX parses PAX headers.
  303. // If an extended header (type 'x') is invalid, ErrHeader is returned
  304. func parsePAX(r io.Reader) (map[string]string, error) {
  305. buf, err := ioutil.ReadAll(r)
  306. if err != nil {
  307. return nil, err
  308. }
  309. // leaving this function for io.Reader makes it more testable
  310. if tr, ok := r.(*Reader); ok && tr.RawAccounting {
  311. if _, err = tr.rawBytes.Write(buf); err != nil {
  312. return nil, err
  313. }
  314. }
  315. sbuf := string(buf)
  316. // For GNU PAX sparse format 0.0 support.
  317. // This function transforms the sparse format 0.0 headers into format 0.1
  318. // headers since 0.0 headers were not PAX compliant.
  319. var sparseMap []string
  320. paxHdrs := make(map[string]string)
  321. for len(sbuf) > 0 {
  322. key, value, residual, err := parsePAXRecord(sbuf)
  323. if err != nil {
  324. return nil, ErrHeader
  325. }
  326. sbuf = residual
  327. switch key {
  328. case paxGNUSparseOffset, paxGNUSparseNumBytes:
  329. // Validate sparse header order and value.
  330. if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) ||
  331. (len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) ||
  332. strings.Contains(value, ",") {
  333. return nil, ErrHeader
  334. }
  335. sparseMap = append(sparseMap, value)
  336. default:
  337. paxHdrs[key] = value
  338. }
  339. }
  340. if len(sparseMap) > 0 {
  341. paxHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",")
  342. }
  343. return paxHdrs, nil
  344. }
  345. // readHeader reads the next block header and assumes that the underlying reader
  346. // is already aligned to a block boundary. It returns the raw block of the
  347. // header in case further processing is required.
  348. //
  349. // The err will be set to io.EOF only when one of the following occurs:
  350. // - Exactly 0 bytes are read and EOF is hit.
  351. // - Exactly 1 block of zeros is read and EOF is hit.
  352. // - At least 2 blocks of zeros are read.
  353. func (tr *Reader) readHeader() (*Header, *block, error) {
  354. // Two blocks of zero bytes marks the end of the archive.
  355. n, err := io.ReadFull(tr.r, tr.blk[:])
  356. if tr.RawAccounting && (err == nil || err == io.EOF) {
  357. tr.rawBytes.Write(tr.blk[:n])
  358. }
  359. if err != nil {
  360. return nil, nil, err // EOF is okay here; exactly 0 bytes read
  361. }
  362. if bytes.Equal(tr.blk[:], zeroBlock[:]) {
  363. n, err = io.ReadFull(tr.r, tr.blk[:])
  364. if tr.RawAccounting && (err == nil || err == io.EOF) {
  365. tr.rawBytes.Write(tr.blk[:n])
  366. }
  367. if err != nil {
  368. return nil, nil, err // EOF is okay here; exactly 1 block of zeros read
  369. }
  370. if bytes.Equal(tr.blk[:], zeroBlock[:]) {
  371. return nil, nil, io.EOF // normal EOF; exactly 2 block of zeros read
  372. }
  373. return nil, nil, ErrHeader // Zero block and then non-zero block
  374. }
  375. // Verify the header matches a known format.
  376. format := tr.blk.GetFormat()
  377. if format == FormatUnknown {
  378. return nil, nil, ErrHeader
  379. }
  380. var p parser
  381. hdr := new(Header)
  382. // Unpack the V7 header.
  383. v7 := tr.blk.V7()
  384. hdr.Typeflag = v7.TypeFlag()[0]
  385. hdr.Name = p.parseString(v7.Name())
  386. hdr.Linkname = p.parseString(v7.LinkName())
  387. hdr.Size = p.parseNumeric(v7.Size())
  388. hdr.Mode = p.parseNumeric(v7.Mode())
  389. hdr.Uid = int(p.parseNumeric(v7.UID()))
  390. hdr.Gid = int(p.parseNumeric(v7.GID()))
  391. hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0)
  392. // Unpack format specific fields.
  393. if format > formatV7 {
  394. ustar := tr.blk.USTAR()
  395. hdr.Uname = p.parseString(ustar.UserName())
  396. hdr.Gname = p.parseString(ustar.GroupName())
  397. hdr.Devmajor = p.parseNumeric(ustar.DevMajor())
  398. hdr.Devminor = p.parseNumeric(ustar.DevMinor())
  399. var prefix string
  400. switch {
  401. case format.has(FormatUSTAR | FormatPAX):
  402. hdr.Format = format
  403. ustar := tr.blk.USTAR()
  404. prefix = p.parseString(ustar.Prefix())
  405. // For Format detection, check if block is properly formatted since
  406. // the parser is more liberal than what USTAR actually permits.
  407. notASCII := func(r rune) bool { return r >= 0x80 }
  408. if bytes.IndexFunc(tr.blk[:], notASCII) >= 0 {
  409. hdr.Format = FormatUnknown // Non-ASCII characters in block.
  410. }
  411. nul := func(b []byte) bool { return int(b[len(b)-1]) == 0 }
  412. if !(nul(v7.Size()) && nul(v7.Mode()) && nul(v7.UID()) && nul(v7.GID()) &&
  413. nul(v7.ModTime()) && nul(ustar.DevMajor()) && nul(ustar.DevMinor())) {
  414. hdr.Format = FormatUnknown // Numeric fields must end in NUL
  415. }
  416. case format.has(formatSTAR):
  417. star := tr.blk.STAR()
  418. prefix = p.parseString(star.Prefix())
  419. hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0)
  420. hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0)
  421. case format.has(FormatGNU):
  422. hdr.Format = format
  423. var p2 parser
  424. gnu := tr.blk.GNU()
  425. if b := gnu.AccessTime(); b[0] != 0 {
  426. hdr.AccessTime = time.Unix(p2.parseNumeric(b), 0)
  427. }
  428. if b := gnu.ChangeTime(); b[0] != 0 {
  429. hdr.ChangeTime = time.Unix(p2.parseNumeric(b), 0)
  430. }
  431. // Prior to Go1.8, the Writer had a bug where it would output
  432. // an invalid tar file in certain rare situations because the logic
  433. // incorrectly believed that the old GNU format had a prefix field.
  434. // This is wrong and leads to an output file that mangles the
  435. // atime and ctime fields, which are often left unused.
  436. //
  437. // In order to continue reading tar files created by former, buggy
  438. // versions of Go, we skeptically parse the atime and ctime fields.
  439. // If we are unable to parse them and the prefix field looks like
  440. // an ASCII string, then we fallback on the pre-Go1.8 behavior
  441. // of treating these fields as the USTAR prefix field.
  442. //
  443. // Note that this will not use the fallback logic for all possible
  444. // files generated by a pre-Go1.8 toolchain. If the generated file
  445. // happened to have a prefix field that parses as valid
  446. // atime and ctime fields (e.g., when they are valid octal strings),
  447. // then it is impossible to distinguish between an valid GNU file
  448. // and an invalid pre-Go1.8 file.
  449. //
  450. // See https://golang.org/issues/12594
  451. // See https://golang.org/issues/21005
  452. if p2.err != nil {
  453. hdr.AccessTime, hdr.ChangeTime = time.Time{}, time.Time{}
  454. ustar := tr.blk.USTAR()
  455. if s := p.parseString(ustar.Prefix()); isASCII(s) {
  456. prefix = s
  457. }
  458. hdr.Format = FormatUnknown // Buggy file is not GNU
  459. }
  460. }
  461. if len(prefix) > 0 {
  462. hdr.Name = prefix + "/" + hdr.Name
  463. }
  464. }
  465. return hdr, &tr.blk, p.err
  466. }
  467. // readOldGNUSparseMap reads the sparse map from the old GNU sparse format.
  468. // The sparse map is stored in the tar header if it's small enough.
  469. // If it's larger than four entries, then one or more extension headers are used
  470. // to store the rest of the sparse map.
  471. //
  472. // The Header.Size does not reflect the size of any extended headers used.
  473. // Thus, this function will read from the raw io.Reader to fetch extra headers.
  474. // This method mutates blk in the process.
  475. func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, error) {
  476. // Make sure that the input format is GNU.
  477. // Unfortunately, the STAR format also has a sparse header format that uses
  478. // the same type flag but has a completely different layout.
  479. if blk.GetFormat() != FormatGNU {
  480. return nil, ErrHeader
  481. }
  482. hdr.Format.mayOnlyBe(FormatGNU)
  483. var p parser
  484. hdr.Size = p.parseNumeric(blk.GNU().RealSize())
  485. if p.err != nil {
  486. return nil, p.err
  487. }
  488. s := blk.GNU().Sparse()
  489. spd := make(sparseDatas, 0, s.MaxEntries())
  490. for {
  491. for i := 0; i < s.MaxEntries(); i++ {
  492. // This termination condition is identical to GNU and BSD tar.
  493. if s.Entry(i).Offset()[0] == 0x00 {
  494. break // Don't return, need to process extended headers (even if empty)
  495. }
  496. offset := p.parseNumeric(s.Entry(i).Offset())
  497. length := p.parseNumeric(s.Entry(i).Length())
  498. if p.err != nil {
  499. return nil, p.err
  500. }
  501. spd = append(spd, sparseEntry{Offset: offset, Length: length})
  502. }
  503. if s.IsExtended()[0] > 0 {
  504. // There are more entries. Read an extension header and parse its entries.
  505. if _, err := mustReadFull(tr.r, blk[:]); err != nil {
  506. return nil, err
  507. }
  508. if tr.RawAccounting {
  509. tr.rawBytes.Write(blk[:])
  510. }
  511. s = blk.Sparse()
  512. continue
  513. }
  514. return spd, nil // Done
  515. }
  516. }
  517. // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
  518. // version 1.0. The format of the sparse map consists of a series of
  519. // newline-terminated numeric fields. The first field is the number of entries
  520. // and is always present. Following this are the entries, consisting of two
  521. // fields (offset, length). This function must stop reading at the end
  522. // boundary of the block containing the last newline.
  523. //
  524. // Note that the GNU manual says that numeric values should be encoded in octal
  525. // format. However, the GNU tar utility itself outputs these values in decimal.
  526. // As such, this library treats values as being encoded in decimal.
  527. func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) {
  528. var (
  529. cntNewline int64
  530. buf bytes.Buffer
  531. blk block
  532. )
  533. // feedTokens copies data in blocks from r into buf until there are
  534. // at least cnt newlines in buf. It will not read more blocks than needed.
  535. feedTokens := func(n int64) error {
  536. for cntNewline < n {
  537. if _, err := mustReadFull(r, blk[:]); err != nil {
  538. return err
  539. }
  540. buf.Write(blk[:])
  541. for _, c := range blk {
  542. if c == '\n' {
  543. cntNewline++
  544. }
  545. }
  546. }
  547. return nil
  548. }
  549. // nextToken gets the next token delimited by a newline. This assumes that
  550. // at least one newline exists in the buffer.
  551. nextToken := func() string {
  552. cntNewline--
  553. tok, _ := buf.ReadString('\n')
  554. return strings.TrimRight(tok, "\n")
  555. }
  556. // Parse for the number of entries.
  557. // Use integer overflow resistant math to check this.
  558. if err := feedTokens(1); err != nil {
  559. return nil, err
  560. }
  561. numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int
  562. if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
  563. return nil, ErrHeader
  564. }
  565. // Parse for all member entries.
  566. // numEntries is trusted after this since a potential attacker must have
  567. // committed resources proportional to what this library used.
  568. if err := feedTokens(2 * numEntries); err != nil {
  569. return nil, err
  570. }
  571. spd := make(sparseDatas, 0, numEntries)
  572. for i := int64(0); i < numEntries; i++ {
  573. offset, err1 := strconv.ParseInt(nextToken(), 10, 64)
  574. length, err2 := strconv.ParseInt(nextToken(), 10, 64)
  575. if err1 != nil || err2 != nil {
  576. return nil, ErrHeader
  577. }
  578. spd = append(spd, sparseEntry{Offset: offset, Length: length})
  579. }
  580. return spd, nil
  581. }
  582. // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
  583. // version 0.1. The sparse map is stored in the PAX headers.
  584. func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) {
  585. // Get number of entries.
  586. // Use integer overflow resistant math to check this.
  587. numEntriesStr := paxHdrs[paxGNUSparseNumBlocks]
  588. numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
  589. if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
  590. return nil, ErrHeader
  591. }
  592. // There should be two numbers in sparseMap for each entry.
  593. sparseMap := strings.Split(paxHdrs[paxGNUSparseMap], ",")
  594. if len(sparseMap) == 1 && sparseMap[0] == "" {
  595. sparseMap = sparseMap[:0]
  596. }
  597. if int64(len(sparseMap)) != 2*numEntries {
  598. return nil, ErrHeader
  599. }
  600. // Loop through the entries in the sparse map.
  601. // numEntries is trusted now.
  602. spd := make(sparseDatas, 0, numEntries)
  603. for len(sparseMap) >= 2 {
  604. offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64)
  605. length, err2 := strconv.ParseInt(sparseMap[1], 10, 64)
  606. if err1 != nil || err2 != nil {
  607. return nil, ErrHeader
  608. }
  609. spd = append(spd, sparseEntry{Offset: offset, Length: length})
  610. sparseMap = sparseMap[2:]
  611. }
  612. return spd, nil
  613. }
  614. // Read reads from the current file in the tar archive.
  615. // It returns (0, io.EOF) when it reaches the end of that file,
  616. // until Next is called to advance to the next file.
  617. //
  618. // If the current file is sparse, then the regions marked as a hole
  619. // are read back as NUL-bytes.
  620. //
  621. // Calling Read on special types like TypeLink, TypeSymlink, TypeChar,
  622. // TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what
  623. // the Header.Size claims.
  624. func (tr *Reader) Read(b []byte) (int, error) {
  625. if tr.err != nil {
  626. return 0, tr.err
  627. }
  628. n, err := tr.curr.Read(b)
  629. if err != nil && err != io.EOF {
  630. tr.err = err
  631. }
  632. return n, err
  633. }
  634. // writeTo writes the content of the current file to w.
  635. // The bytes written matches the number of remaining bytes in the current file.
  636. //
  637. // If the current file is sparse and w is an io.WriteSeeker,
  638. // then writeTo uses Seek to skip past holes defined in Header.SparseHoles,
  639. // assuming that skipped regions are filled with NULs.
  640. // This always writes the last byte to ensure w is the right size.
  641. //
  642. // TODO(dsnet): Re-export this when adding sparse file support.
  643. // See https://golang.org/issue/22735
  644. func (tr *Reader) writeTo(w io.Writer) (int64, error) {
  645. if tr.err != nil {
  646. return 0, tr.err
  647. }
  648. n, err := tr.curr.WriteTo(w)
  649. if err != nil {
  650. tr.err = err
  651. }
  652. return n, err
  653. }
  654. // regFileReader is a fileReader for reading data from a regular file entry.
  655. type regFileReader struct {
  656. r io.Reader // Underlying Reader
  657. nb int64 // Number of remaining bytes to read
  658. }
  659. func (fr *regFileReader) Read(b []byte) (n int, err error) {
  660. if int64(len(b)) > fr.nb {
  661. b = b[:fr.nb]
  662. }
  663. if len(b) > 0 {
  664. n, err = fr.r.Read(b)
  665. fr.nb -= int64(n)
  666. }
  667. switch {
  668. case err == io.EOF && fr.nb > 0:
  669. return n, io.ErrUnexpectedEOF
  670. case err == nil && fr.nb == 0:
  671. return n, io.EOF
  672. default:
  673. return n, err
  674. }
  675. }
  676. func (fr *regFileReader) WriteTo(w io.Writer) (int64, error) {
  677. return io.Copy(w, struct{ io.Reader }{fr})
  678. }
  679. func (fr regFileReader) LogicalRemaining() int64 {
  680. return fr.nb
  681. }
  682. func (fr regFileReader) PhysicalRemaining() int64 {
  683. return fr.nb
  684. }
  685. // sparseFileReader is a fileReader for reading data from a sparse file entry.
  686. type sparseFileReader struct {
  687. fr fileReader // Underlying fileReader
  688. sp sparseHoles // Normalized list of sparse holes
  689. pos int64 // Current position in sparse file
  690. }
  691. func (sr *sparseFileReader) Read(b []byte) (n int, err error) {
  692. finished := int64(len(b)) >= sr.LogicalRemaining()
  693. if finished {
  694. b = b[:sr.LogicalRemaining()]
  695. }
  696. b0 := b
  697. endPos := sr.pos + int64(len(b))
  698. for endPos > sr.pos && err == nil {
  699. var nf int // Bytes read in fragment
  700. holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
  701. if sr.pos < holeStart { // In a data fragment
  702. bf := b[:min(int64(len(b)), holeStart-sr.pos)]
  703. nf, err = tryReadFull(sr.fr, bf)
  704. } else { // In a hole fragment
  705. bf := b[:min(int64(len(b)), holeEnd-sr.pos)]
  706. nf, err = tryReadFull(zeroReader{}, bf)
  707. }
  708. b = b[nf:]
  709. sr.pos += int64(nf)
  710. if sr.pos >= holeEnd && len(sr.sp) > 1 {
  711. sr.sp = sr.sp[1:] // Ensure last fragment always remains
  712. }
  713. }
  714. n = len(b0) - len(b)
  715. switch {
  716. case err == io.EOF:
  717. return n, errMissData // Less data in dense file than sparse file
  718. case err != nil:
  719. return n, err
  720. case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0:
  721. return n, errUnrefData // More data in dense file than sparse file
  722. case finished:
  723. return n, io.EOF
  724. default:
  725. return n, nil
  726. }
  727. }
  728. func (sr *sparseFileReader) WriteTo(w io.Writer) (n int64, err error) {
  729. ws, ok := w.(io.WriteSeeker)
  730. if ok {
  731. if _, err := ws.Seek(0, io.SeekCurrent); err != nil {
  732. ok = false // Not all io.Seeker can really seek
  733. }
  734. }
  735. if !ok {
  736. return io.Copy(w, struct{ io.Reader }{sr})
  737. }
  738. var writeLastByte bool
  739. pos0 := sr.pos
  740. for sr.LogicalRemaining() > 0 && !writeLastByte && err == nil {
  741. var nf int64 // Size of fragment
  742. holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
  743. if sr.pos < holeStart { // In a data fragment
  744. nf = holeStart - sr.pos
  745. nf, err = io.CopyN(ws, sr.fr, nf)
  746. } else { // In a hole fragment
  747. nf = holeEnd - sr.pos
  748. if sr.PhysicalRemaining() == 0 {
  749. writeLastByte = true
  750. nf--
  751. }
  752. _, err = ws.Seek(nf, io.SeekCurrent)
  753. }
  754. sr.pos += nf
  755. if sr.pos >= holeEnd && len(sr.sp) > 1 {
  756. sr.sp = sr.sp[1:] // Ensure last fragment always remains
  757. }
  758. }
  759. // If the last fragment is a hole, then seek to 1-byte before EOF, and
  760. // write a single byte to ensure the file is the right size.
  761. if writeLastByte && err == nil {
  762. _, err = ws.Write([]byte{0})
  763. sr.pos++
  764. }
  765. n = sr.pos - pos0
  766. switch {
  767. case err == io.EOF:
  768. return n, errMissData // Less data in dense file than sparse file
  769. case err != nil:
  770. return n, err
  771. case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0:
  772. return n, errUnrefData // More data in dense file than sparse file
  773. default:
  774. return n, nil
  775. }
  776. }
  777. func (sr sparseFileReader) LogicalRemaining() int64 {
  778. return sr.sp[len(sr.sp)-1].endOffset() - sr.pos
  779. }
  780. func (sr sparseFileReader) PhysicalRemaining() int64 {
  781. return sr.fr.PhysicalRemaining()
  782. }
  783. type zeroReader struct{}
  784. func (zeroReader) Read(b []byte) (int, error) {
  785. for i := range b {
  786. b[i] = 0
  787. }
  788. return len(b), nil
  789. }
  790. // mustReadFull is like io.ReadFull except it returns
  791. // io.ErrUnexpectedEOF when io.EOF is hit before len(b) bytes are read.
  792. func mustReadFull(r io.Reader, b []byte) (int, error) {
  793. n, err := tryReadFull(r, b)
  794. if err == io.EOF {
  795. err = io.ErrUnexpectedEOF
  796. }
  797. return n, err
  798. }
  799. // tryReadFull is like io.ReadFull except it returns
  800. // io.EOF when it is hit before len(b) bytes are read.
  801. func tryReadFull(r io.Reader, b []byte) (n int, err error) {
  802. for len(b) > n && err == nil {
  803. var nn int
  804. nn, err = r.Read(b[n:])
  805. n += nn
  806. }
  807. if len(b) == n && err == io.EOF {
  808. err = nil
  809. }
  810. return n, err
  811. }
  812. // discard skips n bytes in r, reporting an error if unable to do so.
  813. func discard(tr *Reader, n int64) error {
  814. var seekSkipped, copySkipped int64
  815. var err error
  816. r := tr.r
  817. if tr.RawAccounting {
  818. copySkipped, err = io.CopyN(tr.rawBytes, tr.r, n)
  819. goto out
  820. }
  821. // If possible, Seek to the last byte before the end of the data section.
  822. // Do this because Seek is often lazy about reporting errors; this will mask
  823. // the fact that the stream may be truncated. We can rely on the
  824. // io.CopyN done shortly afterwards to trigger any IO errors.
  825. if sr, ok := r.(io.Seeker); ok && n > 1 {
  826. // Not all io.Seeker can actually Seek. For example, os.Stdin implements
  827. // io.Seeker, but calling Seek always returns an error and performs
  828. // no action. Thus, we try an innocent seek to the current position
  829. // to see if Seek is really supported.
  830. pos1, err := sr.Seek(0, io.SeekCurrent)
  831. if pos1 >= 0 && err == nil {
  832. // Seek seems supported, so perform the real Seek.
  833. pos2, err := sr.Seek(n-1, io.SeekCurrent)
  834. if pos2 < 0 || err != nil {
  835. return err
  836. }
  837. seekSkipped = pos2 - pos1
  838. }
  839. }
  840. copySkipped, err = io.CopyN(ioutil.Discard, r, n-seekSkipped)
  841. out:
  842. if err == io.EOF && seekSkipped+copySkipped < n {
  843. err = io.ErrUnexpectedEOF
  844. }
  845. return err
  846. }