diff --git a/graph/graph.go b/graph/graph.go index 52069e9556..be911b0482 100644 --- a/graph/graph.go +++ b/graph/graph.go @@ -28,6 +28,8 @@ import ( "github.com/docker/docker/pkg/system" "github.com/docker/docker/pkg/truncindex" "github.com/docker/docker/runconfig" + "github.com/vbatts/tar-split/tar/asm" + "github.com/vbatts/tar-split/tar/storage" ) // The type is used to protect pulling or building related image @@ -82,6 +84,14 @@ type Graph struct { retained *retainedLayers } +// file names for ./graph// +const ( + jsonFileName = "json" + layersizeFileName = "layersize" + digestFileName = "checksum" + tarDataFileName = "tar-data.json.gz" +) + var ( // ErrDigestNotSet is used when request the digest for a layer // but the layer has no digest value or content to compute the @@ -456,7 +466,7 @@ func (graph *Graph) loadImage(id string) (*image.Image, error) { return nil, err } - if buf, err := ioutil.ReadFile(filepath.Join(root, "layersize")); err != nil { + if buf, err := ioutil.ReadFile(filepath.Join(root, layersizeFileName)); err != nil { if !os.IsNotExist(err) { return nil, err } @@ -479,8 +489,8 @@ func (graph *Graph) loadImage(id string) (*image.Image, error) { // saveSize stores the `size` in the provided graph `img` directory `root`. func (graph *Graph) saveSize(root string, size int) error { - if err := ioutil.WriteFile(filepath.Join(root, "layersize"), []byte(strconv.Itoa(size)), 0600); err != nil { - return fmt.Errorf("Error storing image size in %s/layersize: %s", root, err) + if err := ioutil.WriteFile(filepath.Join(root, layersizeFileName), []byte(strconv.Itoa(size)), 0600); err != nil { + return fmt.Errorf("Error storing image size in %s/%s: %s", root, layersizeFileName, err) } return nil } @@ -488,8 +498,8 @@ func (graph *Graph) saveSize(root string, size int) error { // SetDigest sets the digest for the image layer to the provided value. func (graph *Graph) SetDigest(id string, dgst digest.Digest) error { root := graph.imageRoot(id) - if err := ioutil.WriteFile(filepath.Join(root, "checksum"), []byte(dgst.String()), 0600); err != nil { - return fmt.Errorf("Error storing digest in %s/checksum: %s", root, err) + if err := ioutil.WriteFile(filepath.Join(root, digestFileName), []byte(dgst.String()), 0600); err != nil { + return fmt.Errorf("Error storing digest in %s/%s: %s", root, digestFileName, err) } return nil } @@ -497,7 +507,7 @@ func (graph *Graph) SetDigest(id string, dgst digest.Digest) error { // GetDigest gets the digest for the provide image layer id. func (graph *Graph) GetDigest(id string) (digest.Digest, error) { root := graph.imageRoot(id) - cs, err := ioutil.ReadFile(filepath.Join(root, "checksum")) + cs, err := ioutil.ReadFile(filepath.Join(root, digestFileName)) if err != nil { if os.IsNotExist(err) { return "", ErrDigestNotSet @@ -520,5 +530,82 @@ func (graph *Graph) RawJSON(id string) ([]byte, error) { } func jsonPath(root string) string { - return filepath.Join(root, "json") + return filepath.Join(root, jsonFileName) +} + +func (graph *Graph) disassembleAndApplyTarLayer(img *image.Image, layerData archive.ArchiveReader, root string) error { + // this is saving the tar-split metadata + mf, err := os.OpenFile(filepath.Join(root, tarDataFileName), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(0600)) + if err != nil { + return err + } + mfz := gzip.NewWriter(mf) + metaPacker := storage.NewJSONPacker(mfz) + defer mf.Close() + defer mfz.Close() + + inflatedLayerData, err := archive.DecompressStream(layerData) + if err != nil { + return err + } + + // we're passing nil here for the file putter, because the ApplyDiff will + // handle the extraction of the archive + rdr, err := asm.NewInputTarStream(inflatedLayerData, metaPacker, nil) + if err != nil { + return err + } + + if img.Size, err = graph.driver.ApplyDiff(img.ID, img.Parent, archive.ArchiveReader(rdr)); err != nil { + return err + } + + return nil +} + +func (graph *Graph) assembleTarLayer(img *image.Image) (archive.Archive, error) { + root := graph.imageRoot(img.ID) + mFileName := filepath.Join(root, tarDataFileName) + mf, err := os.Open(mFileName) + if err != nil { + if !os.IsNotExist(err) { + logrus.Errorf("failed to open %q: %s", mFileName, err) + } + return nil, err + } + pR, pW := io.Pipe() + // this will need to be in a goroutine, as we are returning the stream of a + // tar archive, but can not close the metadata reader early (when this + // function returns)... + go func() { + defer mf.Close() + // let's reassemble! + logrus.Debugf("[graph] TarLayer with reassembly: %s", img.ID) + mfz, err := gzip.NewReader(mf) + if err != nil { + pW.CloseWithError(fmt.Errorf("[graph] error with %s: %s", mFileName, err)) + return + } + defer mfz.Close() + + // get our relative path to the container + fsLayer, err := graph.driver.Get(img.ID, "") + if err != nil { + pW.CloseWithError(err) + return + } + defer graph.driver.Put(img.ID) + + metaUnpacker := storage.NewJSONUnpacker(mfz) + fileGetter := storage.NewPathFileGetter(fsLayer) + logrus.Debugf("[graph] %s is at %q", img.ID, fsLayer) + ots := asm.NewOutputTarStream(fileGetter, metaUnpacker) + defer ots.Close() + if _, err := io.Copy(pW, ots); err != nil { + pW.CloseWithError(err) + return + } + pW.Close() + }() + return pR, nil } diff --git a/graph/graph_unix.go b/graph/graph_unix.go index e42eb9bb36..351a84ccad 100644 --- a/graph/graph_unix.go +++ b/graph/graph_unix.go @@ -10,6 +10,7 @@ import ( "strings" "syscall" + "github.com/Sirupsen/logrus" "github.com/docker/docker/image" "github.com/docker/docker/pkg/archive" "github.com/docker/docker/pkg/system" @@ -90,7 +91,7 @@ func (graph *Graph) restoreBaseImages() ([]string, error) { func (graph *Graph) storeImage(img *image.Image, layerData archive.ArchiveReader, root string) (err error) { // Store the layer. If layerData is not nil, unpack it into the new layer if layerData != nil { - if img.Size, err = graph.driver.ApplyDiff(img.ID, img.Parent, layerData); err != nil { + if err := graph.disassembleAndApplyTarLayer(img, layerData, root); err != nil { return err } } @@ -111,5 +112,10 @@ func (graph *Graph) storeImage(img *image.Image, layerData archive.ArchiveReader // TarLayer returns a tar archive of the image's filesystem layer. func (graph *Graph) TarLayer(img *image.Image) (arch archive.Archive, err error) { - return graph.driver.Diff(img.ID, img.Parent) + rdr, err := graph.assembleTarLayer(img) + if err != nil { + logrus.Debugf("[graph] TarLayer with traditional differ: %s", img.ID) + return graph.driver.Diff(img.ID, img.Parent) + } + return rdr, nil } diff --git a/graph/graph_windows.go b/graph/graph_windows.go index 2904f9b9f8..1422705a75 100644 --- a/graph/graph_windows.go +++ b/graph/graph_windows.go @@ -115,7 +115,7 @@ func (graph *Graph) storeImage(img *image.Image, layerData archive.ArchiveReader // Store the layer. If layerData is not nil, unpack it into the new layer if layerData != nil { - if img.Size, err = graph.driver.ApplyDiff(img.ID, img.Parent, layerData); err != nil { + if err := graph.disassembleAndApplyTarLayer(img, layerData, root); err != nil { return err } } @@ -156,6 +156,11 @@ func (graph *Graph) TarLayer(img *image.Image) (arch archive.Archive, err error) // We keep this functionality here so that we can still work with the VFS // driver during development. VFS is not supported (and just will not work) // for Windows containers. - return graph.driver.Diff(img.ID, img.Parent) + rdr, err := graph.assembleTarLayer(img) + if err != nil { + logrus.Debugf("[graph] TarLayer with traditional differ: %s", img.ID) + return graph.driver.Diff(img.ID, img.Parent) + } + return rdr, nil } } diff --git a/hack/vendor.sh b/hack/vendor.sh index cafcf07a12..26934c5987 100755 --- a/hack/vendor.sh +++ b/hack/vendor.sh @@ -34,8 +34,9 @@ clone git github.com/samuel/go-zookeeper d0e0d8e11f318e000a8cc434616d69e329edc37 clone git github.com/coreos/go-etcd v2.0.0 clone git github.com/hashicorp/consul v0.5.2 -# get distribution packages +# get graph and distribution packages clone git github.com/docker/distribution 419bbc2da637d9b2a812be78ef8436df7caac70d +clone git github.com/vbatts/tar-split v0.9.4 clone git github.com/opencontainers/runc v0.0.2 # libcontainer # libcontainer deps (see src/github.com/docker/libcontainer/update-vendor.sh) diff --git a/vendor/src/github.com/vbatts/tar-split/.travis.yml b/vendor/src/github.com/vbatts/tar-split/.travis.yml new file mode 100644 index 0000000000..fc1571c979 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/.travis.yml @@ -0,0 +1,13 @@ +language: go +go: + - 1.4.2 + - 1.3.3 + +# let us have pretty, fast Docker-based Travis workers! +sudo: false + +# we don't need "go get" here <3 +install: go get -d ./... + +script: + - go test -v ./... diff --git a/vendor/src/github.com/vbatts/tar-split/DESIGN.md b/vendor/src/github.com/vbatts/tar-split/DESIGN.md new file mode 100644 index 0000000000..1ce3fd4ae4 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/DESIGN.md @@ -0,0 +1,36 @@ +Flow of TAR stream +================== + +The underlying use of `github.com/vbatts/tar-split/archive/tar` is most similar +to stdlib. + + +Packer interface +---------------- + +For ease of storage and usage of the raw bytes, there will be a storage +interface, that accepts an io.Writer (This way you could pass it an in memory +buffer or a file handle). + +Having a Packer interface can allow configuration of hash.Hash for file payloads +and providing your own io.Writer. + +Instead of having a state directory to store all the header information for all +Readers, we will leave that up to user of Reader. Because we can not assume an +ID for each Reader, and keeping that information differentiated. + + + +State Directory +--------------- + +Perhaps we could deduplicate the header info, by hashing the rawbytes and +storing them in a directory tree like: + + ./ac/dc/beef + +Then reference the hash of the header info, in the positional records for the +tar stream. Though this could be a future feature, and not required for an +initial implementation. Also, this would imply an owned state directory, rather +than just writing storage info to an io.Writer. + diff --git a/vendor/src/github.com/vbatts/tar-split/LICENSE b/vendor/src/github.com/vbatts/tar-split/LICENSE new file mode 100644 index 0000000000..8ba549196e --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2015 Vincent Batts, Raleigh, NC, USA + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/vendor/src/github.com/vbatts/tar-split/README.md b/vendor/src/github.com/vbatts/tar-split/README.md new file mode 100644 index 0000000000..c5e9a71779 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/README.md @@ -0,0 +1,181 @@ +tar-split +======== + +[![Build Status](https://travis-ci.org/vbatts/tar-split.svg?branch=master)](https://travis-ci.org/vbatts/tar-split) + +Extend the upstream golang stdlib `archive/tar` library, to expose the raw +bytes of the TAR, rather than just the marshalled headers and file stream. + +The goal being that by preserving the raw bytes of each header, padding bytes, +and the raw file payload, one could reassemble the original archive. + + +Docs +---- + +* https://godoc.org/github.com/vbatts/tar-split/tar/asm +* https://godoc.org/github.com/vbatts/tar-split/tar/storage +* https://godoc.org/github.com/vbatts/tar-split/archive/tar + + +Caveat +------ + +Eventually this should detect TARs that this is not possible with. + +For example stored sparse files that have "holes" in them, will be read as a +contiguous file, though the archive contents may be recorded in sparse format. +Therefore when adding the file payload to a reassembled tar, to achieve +identical output, the file payload would need be precisely re-sparsified. This +is not something I seek to fix imediately, but would rather have an alert that +precise reassembly is not possible. +(see more http://www.gnu.org/software/tar/manual/html_node/Sparse-Formats.html) + + +Other caveat, while tar archives support having multiple file entries for the +same path, we will not support this feature. If there are more than one entries +with the same path, expect an err (like `ErrDuplicatePath`) or a resulting tar +stream that does not validate your original checksum/signature. + + +Contract +-------- + +Do not break the API of stdlib `archive/tar` in our fork (ideally find an +upstream mergeable solution) + + +Std Version +----------- + +The version of golang stdlib `archive/tar` is from go1.4.1, and their master branch around [a9dddb53f](https://github.com/golang/go/tree/a9dddb53f) + + +Example +------- + +First we'll get an archive to work with. For repeatability, we'll make an +archive from what you've just cloned: + +``` +git archive --format=tar -o tar-split.tar HEAD . +``` + +Then build the example main.go: + +``` +go build ./main.go +``` + +Now run the example over the archive: + +``` +$ ./main tar-split.tar +2015/02/20 15:00:58 writing "tar-split.tar" to "tar-split.tar.out" +pax_global_header pre: 512 read: 52 +.travis.yml pre: 972 read: 374 +DESIGN.md pre: 650 read: 1131 +LICENSE pre: 917 read: 1075 +README.md pre: 973 read: 4289 +archive/ pre: 831 read: 0 +archive/tar/ pre: 512 read: 0 +archive/tar/common.go pre: 512 read: 7790 +[...] +tar/storage/entry_test.go pre: 667 read: 1137 +tar/storage/getter.go pre: 911 read: 2741 +tar/storage/getter_test.go pre: 843 read: 1491 +tar/storage/packer.go pre: 557 read: 3141 +tar/storage/packer_test.go pre: 955 read: 3096 +EOF padding: 1512 +Remainder: 512 +Size: 215040; Sum: 215040 +``` + +*What are we seeing here?* + +* `pre` is the header of a file entry, and potentially the padding from the + end of the prior file's payload. Also with particular tar extensions and pax + attributes, the header can exceed 512 bytes. +* `read` is the size of the file payload from the entry +* `EOF padding` is the expected 1024 null bytes on the end of a tar archive, + plus potential padding from the end of the prior file entry's payload +* `Remainder` is the remaining bytes of an archive. This is typically deadspace + as most tar implmentations will return after having reached the end of the + 1024 null bytes. Though various implementations will include some amount of + bytes here, which will affect the checksum of the resulting tar archive, + therefore this must be accounted for as well. + +Ideally the input tar and output `*.out`, will match: + +``` +$ sha1sum tar-split.tar* +ca9e19966b892d9ad5960414abac01ef585a1e22 tar-split.tar +ca9e19966b892d9ad5960414abac01ef585a1e22 tar-split.tar.out +``` + + +Stored Metadata +--------------- + +Since the raw bytes of the headers and padding are stored, you may be wondering +what the size implications are. The headers are at least 512 bytes per +file (sometimes more), at least 1024 null bytes on the end, and then various +padding. This makes for a constant linear growth in the stored metadata, with a +naive storage implementation. + +Reusing our prior example's `tar-split.tar`, let's build the checksize.go example: + +``` +go build ./checksize.go +``` + +``` +$ ./checksize ./tar-split.tar +inspecting "tar-split.tar" (size 210k) + -- number of files: 50 + -- size of metadata uncompressed: 53k + -- size of gzip compressed metadata: 3k +``` + +So assuming you've managed the extraction of the archive yourself, for reuse of +the file payloads from a relative path, then the only additional storage +implications are as little as 3kb. + +But let's look at a larger archive, with many files. + +``` +$ ls -sh ./d.tar +1.4G ./d.tar +$ ./checksize ~/d.tar +inspecting "/home/vbatts/d.tar" (size 1420749k) + -- number of files: 38718 + -- size of metadata uncompressed: 43261k + -- size of gzip compressed metadata: 2251k +``` + +Here, an archive with 38,718 files has a compressed footprint of about 2mb. + +Rolling the null bytes on the end of the archive, we will assume a +bytes-per-file rate for the storage implications. + +| uncompressed | compressed | +| :----------: | :--------: | +| ~ 1kb per/file | 0.06kb per/file | + + +What's Next? +------------ + +* More implementations of storage Packer and Unpacker + - could be a redis or mongo backend +* More implementations of FileGetter and FilePutter + - could be a redis or mongo backend +* cli tooling to assemble/disassemble a provided tar archive +* would be interesting to have an assembler stream that implements `io.Seeker` + +License +------- + +See LICENSE + + diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/common.go b/vendor/src/github.com/vbatts/tar-split/archive/tar/common.go new file mode 100644 index 0000000000..e363aa793e --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/archive/tar/common.go @@ -0,0 +1,305 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package tar implements access to tar archives. +// It aims to cover most of the variations, including those produced +// by GNU and BSD tars. +// +// References: +// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 +// http://www.gnu.org/software/tar/manual/html_node/Standard.html +// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html +package tar + +import ( + "bytes" + "errors" + "fmt" + "os" + "path" + "time" +) + +const ( + blockSize = 512 + + // Types + TypeReg = '0' // regular file + TypeRegA = '\x00' // regular file + TypeLink = '1' // hard link + TypeSymlink = '2' // symbolic link + TypeChar = '3' // character device node + TypeBlock = '4' // block device node + TypeDir = '5' // directory + TypeFifo = '6' // fifo node + TypeCont = '7' // reserved + TypeXHeader = 'x' // extended header + TypeXGlobalHeader = 'g' // global extended header + TypeGNULongName = 'L' // Next file has a long name + TypeGNULongLink = 'K' // Next file symlinks to a file w/ a long name + TypeGNUSparse = 'S' // sparse file +) + +// A Header represents a single header in a tar archive. +// Some fields may not be populated. +type Header struct { + Name string // name of header file entry + Mode int64 // permission and mode bits + Uid int // user id of owner + Gid int // group id of owner + Size int64 // length in bytes + ModTime time.Time // modified time + Typeflag byte // type of header entry + Linkname string // target name of link + Uname string // user name of owner + Gname string // group name of owner + Devmajor int64 // major number of character or block device + Devminor int64 // minor number of character or block device + AccessTime time.Time // access time + ChangeTime time.Time // status change time + Xattrs map[string]string +} + +// File name constants from the tar spec. +const ( + fileNameSize = 100 // Maximum number of bytes in a standard tar name. + fileNamePrefixSize = 155 // Maximum number of ustar extension bytes. +) + +// FileInfo returns an os.FileInfo for the Header. +func (h *Header) FileInfo() os.FileInfo { + return headerFileInfo{h} +} + +// headerFileInfo implements os.FileInfo. +type headerFileInfo struct { + h *Header +} + +func (fi headerFileInfo) Size() int64 { return fi.h.Size } +func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() } +func (fi headerFileInfo) ModTime() time.Time { return fi.h.ModTime } +func (fi headerFileInfo) Sys() interface{} { return fi.h } + +// Name returns the base name of the file. +func (fi headerFileInfo) Name() string { + if fi.IsDir() { + return path.Base(path.Clean(fi.h.Name)) + } + return path.Base(fi.h.Name) +} + +// Mode returns the permission and mode bits for the headerFileInfo. +func (fi headerFileInfo) Mode() (mode os.FileMode) { + // Set file permission bits. + mode = os.FileMode(fi.h.Mode).Perm() + + // Set setuid, setgid and sticky bits. + if fi.h.Mode&c_ISUID != 0 { + // setuid + mode |= os.ModeSetuid + } + if fi.h.Mode&c_ISGID != 0 { + // setgid + mode |= os.ModeSetgid + } + if fi.h.Mode&c_ISVTX != 0 { + // sticky + mode |= os.ModeSticky + } + + // Set file mode bits. + // clear perm, setuid, setgid and sticky bits. + m := os.FileMode(fi.h.Mode) &^ 07777 + if m == c_ISDIR { + // directory + mode |= os.ModeDir + } + if m == c_ISFIFO { + // named pipe (FIFO) + mode |= os.ModeNamedPipe + } + if m == c_ISLNK { + // symbolic link + mode |= os.ModeSymlink + } + if m == c_ISBLK { + // device file + mode |= os.ModeDevice + } + if m == c_ISCHR { + // Unix character device + mode |= os.ModeDevice + mode |= os.ModeCharDevice + } + if m == c_ISSOCK { + // Unix domain socket + mode |= os.ModeSocket + } + + switch fi.h.Typeflag { + case TypeLink, TypeSymlink: + // hard link, symbolic link + mode |= os.ModeSymlink + case TypeChar: + // character device node + mode |= os.ModeDevice + mode |= os.ModeCharDevice + case TypeBlock: + // block device node + mode |= os.ModeDevice + case TypeDir: + // directory + mode |= os.ModeDir + case TypeFifo: + // fifo node + mode |= os.ModeNamedPipe + } + + return mode +} + +// sysStat, if non-nil, populates h from system-dependent fields of fi. +var sysStat func(fi os.FileInfo, h *Header) error + +// Mode constants from the tar spec. +const ( + c_ISUID = 04000 // Set uid + c_ISGID = 02000 // Set gid + c_ISVTX = 01000 // Save text (sticky bit) + c_ISDIR = 040000 // Directory + c_ISFIFO = 010000 // FIFO + c_ISREG = 0100000 // Regular file + c_ISLNK = 0120000 // Symbolic link + c_ISBLK = 060000 // Block special file + c_ISCHR = 020000 // Character special file + c_ISSOCK = 0140000 // Socket +) + +// Keywords for the PAX Extended Header +const ( + paxAtime = "atime" + paxCharset = "charset" + paxComment = "comment" + paxCtime = "ctime" // please note that ctime is not a valid pax header. + paxGid = "gid" + paxGname = "gname" + paxLinkpath = "linkpath" + paxMtime = "mtime" + paxPath = "path" + paxSize = "size" + paxUid = "uid" + paxUname = "uname" + paxXattr = "SCHILY.xattr." + paxNone = "" +) + +// FileInfoHeader creates a partially-populated Header from fi. +// If fi describes a symlink, FileInfoHeader records link as the link target. +// If fi describes a directory, a slash is appended to the name. +// Because os.FileInfo's Name method returns only the base name of +// the file it describes, it may be necessary to modify the Name field +// of the returned header to provide the full path name of the file. +func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { + if fi == nil { + return nil, errors.New("tar: FileInfo is nil") + } + fm := fi.Mode() + h := &Header{ + Name: fi.Name(), + ModTime: fi.ModTime(), + Mode: int64(fm.Perm()), // or'd with c_IS* constants later + } + switch { + case fm.IsRegular(): + h.Mode |= c_ISREG + h.Typeflag = TypeReg + h.Size = fi.Size() + case fi.IsDir(): + h.Typeflag = TypeDir + h.Mode |= c_ISDIR + h.Name += "/" + case fm&os.ModeSymlink != 0: + h.Typeflag = TypeSymlink + h.Mode |= c_ISLNK + h.Linkname = link + case fm&os.ModeDevice != 0: + if fm&os.ModeCharDevice != 0 { + h.Mode |= c_ISCHR + h.Typeflag = TypeChar + } else { + h.Mode |= c_ISBLK + h.Typeflag = TypeBlock + } + case fm&os.ModeNamedPipe != 0: + h.Typeflag = TypeFifo + h.Mode |= c_ISFIFO + case fm&os.ModeSocket != 0: + h.Mode |= c_ISSOCK + default: + return nil, fmt.Errorf("archive/tar: unknown file mode %v", fm) + } + if fm&os.ModeSetuid != 0 { + h.Mode |= c_ISUID + } + if fm&os.ModeSetgid != 0 { + h.Mode |= c_ISGID + } + if fm&os.ModeSticky != 0 { + h.Mode |= c_ISVTX + } + if sysStat != nil { + return h, sysStat(fi, h) + } + return h, nil +} + +var zeroBlock = make([]byte, blockSize) + +// POSIX specifies a sum of the unsigned byte values, but the Sun tar uses signed byte values. +// We compute and return both. +func checksum(header []byte) (unsigned int64, signed int64) { + for i := 0; i < len(header); i++ { + if i == 148 { + // The chksum field (header[148:156]) is special: it should be treated as space bytes. + unsigned += ' ' * 8 + signed += ' ' * 8 + i += 7 + continue + } + unsigned += int64(header[i]) + signed += int64(int8(header[i])) + } + return +} + +type slicer []byte + +func (sp *slicer) next(n int) (b []byte) { + s := *sp + b, *sp = s[0:n], s[n:] + return +} + +func isASCII(s string) bool { + for _, c := range s { + if c >= 0x80 { + return false + } + } + return true +} + +func toASCII(s string) string { + if isASCII(s) { + return s + } + var buf bytes.Buffer + for _, c := range s { + if c < 0x80 { + buf.WriteByte(byte(c)) + } + } + return buf.String() +} diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/example_test.go b/vendor/src/github.com/vbatts/tar-split/archive/tar/example_test.go new file mode 100644 index 0000000000..2317f44e96 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/archive/tar/example_test.go @@ -0,0 +1,80 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar_test + +import ( + "archive/tar" + "bytes" + "fmt" + "io" + "log" + "os" +) + +func Example() { + // Create a buffer to write our archive to. + buf := new(bytes.Buffer) + + // Create a new tar archive. + tw := tar.NewWriter(buf) + + // Add some files to the archive. + var files = []struct { + Name, Body string + }{ + {"readme.txt", "This archive contains some text files."}, + {"gopher.txt", "Gopher names:\nGeorge\nGeoffrey\nGonzo"}, + {"todo.txt", "Get animal handling licence."}, + } + for _, file := range files { + hdr := &tar.Header{ + Name: file.Name, + Mode: 0600, + Size: int64(len(file.Body)), + } + if err := tw.WriteHeader(hdr); err != nil { + log.Fatalln(err) + } + if _, err := tw.Write([]byte(file.Body)); err != nil { + log.Fatalln(err) + } + } + // Make sure to check the error on Close. + if err := tw.Close(); err != nil { + log.Fatalln(err) + } + + // Open the tar archive for reading. + r := bytes.NewReader(buf.Bytes()) + tr := tar.NewReader(r) + + // Iterate through the files in the archive. + for { + hdr, err := tr.Next() + if err == io.EOF { + // end of tar archive + break + } + if err != nil { + log.Fatalln(err) + } + fmt.Printf("Contents of %s:\n", hdr.Name) + if _, err := io.Copy(os.Stdout, tr); err != nil { + log.Fatalln(err) + } + fmt.Println() + } + + // Output: + // Contents of readme.txt: + // This archive contains some text files. + // Contents of gopher.txt: + // Gopher names: + // George + // Geoffrey + // Gonzo + // Contents of todo.txt: + // Get animal handling licence. +} diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/reader.go b/vendor/src/github.com/vbatts/tar-split/archive/tar/reader.go new file mode 100644 index 0000000000..a89957ed90 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/archive/tar/reader.go @@ -0,0 +1,926 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +// TODO(dsymonds): +// - pax extensions + +import ( + "bytes" + "errors" + "io" + "io/ioutil" + "os" + "strconv" + "strings" + "time" +) + +var ( + ErrHeader = errors.New("archive/tar: invalid tar header") +) + +const maxNanoSecondIntSize = 9 + +// A Reader provides sequential access to the contents of a tar archive. +// A tar archive consists of a sequence of files. +// The Next method advances to the next file in the archive (including the first), +// and then it can be treated as an io.Reader to access the file's data. +type Reader struct { + r io.Reader + err error + pad int64 // amount of padding (ignored) after current file entry + curr numBytesReader // reader for current file entry + hdrBuff [blockSize]byte // buffer to use in readHeader + + RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this. + rawBytes *bytes.Buffer // last raw bits +} + +// RawBytes accesses the raw bytes of the archive, apart from the file payload itself. +// This includes the header and padding. +// +// This call resets the current rawbytes buffer +// +// Only when RawAccounting is enabled, otherwise this returns nil +func (tr *Reader) RawBytes() []byte { + if !tr.RawAccounting { + return nil + } + if tr.rawBytes == nil { + tr.rawBytes = bytes.NewBuffer(nil) + } + // if we've read them, then flush them. + defer tr.rawBytes.Reset() + return tr.rawBytes.Bytes() +} + +// A numBytesReader is an io.Reader with a numBytes method, returning the number +// of bytes remaining in the underlying encoded data. +type numBytesReader interface { + io.Reader + numBytes() int64 +} + +// A regFileReader is a numBytesReader for reading file data from a tar archive. +type regFileReader struct { + r io.Reader // underlying reader + nb int64 // number of unread bytes for current file entry +} + +// A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive. +type sparseFileReader struct { + rfr *regFileReader // reads the sparse-encoded file data + sp []sparseEntry // the sparse map for the file + pos int64 // keeps track of file position + tot int64 // total size of the file +} + +// Keywords for GNU sparse files in a PAX extended header +const ( + paxGNUSparseNumBlocks = "GNU.sparse.numblocks" + paxGNUSparseOffset = "GNU.sparse.offset" + paxGNUSparseNumBytes = "GNU.sparse.numbytes" + paxGNUSparseMap = "GNU.sparse.map" + paxGNUSparseName = "GNU.sparse.name" + paxGNUSparseMajor = "GNU.sparse.major" + paxGNUSparseMinor = "GNU.sparse.minor" + paxGNUSparseSize = "GNU.sparse.size" + paxGNUSparseRealSize = "GNU.sparse.realsize" +) + +// Keywords for old GNU sparse headers +const ( + oldGNUSparseMainHeaderOffset = 386 + oldGNUSparseMainHeaderIsExtendedOffset = 482 + oldGNUSparseMainHeaderNumEntries = 4 + oldGNUSparseExtendedHeaderIsExtendedOffset = 504 + oldGNUSparseExtendedHeaderNumEntries = 21 + oldGNUSparseOffsetSize = 12 + oldGNUSparseNumBytesSize = 12 +) + +// NewReader creates a new Reader reading from r. +func NewReader(r io.Reader) *Reader { return &Reader{r: r} } + +// Next advances to the next entry in the tar archive. +// +// io.EOF is returned at the end of the input. +func (tr *Reader) Next() (*Header, error) { + var hdr *Header + if tr.RawAccounting { + if tr.rawBytes == nil { + tr.rawBytes = bytes.NewBuffer(nil) + } else { + tr.rawBytes.Reset() + } + } + if tr.err == nil { + tr.skipUnread() + } + if tr.err != nil { + return hdr, tr.err + } + hdr = tr.readHeader() + if hdr == nil { + return hdr, tr.err + } + // Check for PAX/GNU header. + switch hdr.Typeflag { + case TypeXHeader: + // PAX extended header + headers, err := parsePAX(tr) + if err != nil { + return nil, err + } + // We actually read the whole file, + // but this skips alignment padding + tr.skipUnread() + hdr = tr.readHeader() + mergePAX(hdr, headers) + + // Check for a PAX format sparse file + sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers) + if err != nil { + tr.err = err + return nil, err + } + if sp != nil { + // Current file is a PAX format GNU sparse file. + // Set the current file reader to a sparse file reader. + tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size} + } + return hdr, nil + case TypeGNULongName: + // We have a GNU long name header. Its contents are the real file name. + realname, err := ioutil.ReadAll(tr) + if err != nil { + return nil, err + } + var b []byte + if tr.RawAccounting { + if _, err = tr.rawBytes.Write(realname); err != nil { + return nil, err + } + b = tr.RawBytes() + } + hdr, err := tr.Next() + // since the above call to Next() resets the buffer, we need to throw the bytes over + if tr.RawAccounting { + if _, err = tr.rawBytes.Write(b); err != nil { + return nil, err + } + } + hdr.Name = cString(realname) + return hdr, err + case TypeGNULongLink: + // We have a GNU long link header. + realname, err := ioutil.ReadAll(tr) + if err != nil { + return nil, err + } + var b []byte + if tr.RawAccounting { + if _, err = tr.rawBytes.Write(realname); err != nil { + return nil, err + } + b = tr.RawBytes() + } + hdr, err := tr.Next() + // since the above call to Next() resets the buffer, we need to throw the bytes over + if tr.RawAccounting { + if _, err = tr.rawBytes.Write(b); err != nil { + return nil, err + } + } + hdr.Linkname = cString(realname) + return hdr, err + } + return hdr, tr.err +} + +// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then +// this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to +// be treated as a regular file. +func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) { + var sparseFormat string + + // Check for sparse format indicators + major, majorOk := headers[paxGNUSparseMajor] + minor, minorOk := headers[paxGNUSparseMinor] + sparseName, sparseNameOk := headers[paxGNUSparseName] + _, sparseMapOk := headers[paxGNUSparseMap] + sparseSize, sparseSizeOk := headers[paxGNUSparseSize] + sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize] + + // Identify which, if any, sparse format applies from which PAX headers are set + if majorOk && minorOk { + sparseFormat = major + "." + minor + } else if sparseNameOk && sparseMapOk { + sparseFormat = "0.1" + } else if sparseSizeOk { + sparseFormat = "0.0" + } else { + // Not a PAX format GNU sparse file. + return nil, nil + } + + // Check for unknown sparse format + if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" { + return nil, nil + } + + // Update hdr from GNU sparse PAX headers + if sparseNameOk { + hdr.Name = sparseName + } + if sparseSizeOk { + realSize, err := strconv.ParseInt(sparseSize, 10, 0) + if err != nil { + return nil, ErrHeader + } + hdr.Size = realSize + } else if sparseRealSizeOk { + realSize, err := strconv.ParseInt(sparseRealSize, 10, 0) + if err != nil { + return nil, ErrHeader + } + hdr.Size = realSize + } + + // Set up the sparse map, according to the particular sparse format in use + var sp []sparseEntry + var err error + switch sparseFormat { + case "0.0", "0.1": + sp, err = readGNUSparseMap0x1(headers) + case "1.0": + sp, err = readGNUSparseMap1x0(tr.curr) + } + return sp, err +} + +// mergePAX merges well known headers according to PAX standard. +// In general headers with the same name as those found +// in the header struct overwrite those found in the header +// struct with higher precision or longer values. Esp. useful +// for name and linkname fields. +func mergePAX(hdr *Header, headers map[string]string) error { + for k, v := range headers { + switch k { + case paxPath: + hdr.Name = v + case paxLinkpath: + hdr.Linkname = v + case paxGname: + hdr.Gname = v + case paxUname: + hdr.Uname = v + case paxUid: + uid, err := strconv.ParseInt(v, 10, 0) + if err != nil { + return err + } + hdr.Uid = int(uid) + case paxGid: + gid, err := strconv.ParseInt(v, 10, 0) + if err != nil { + return err + } + hdr.Gid = int(gid) + case paxAtime: + t, err := parsePAXTime(v) + if err != nil { + return err + } + hdr.AccessTime = t + case paxMtime: + t, err := parsePAXTime(v) + if err != nil { + return err + } + hdr.ModTime = t + case paxCtime: + t, err := parsePAXTime(v) + if err != nil { + return err + } + hdr.ChangeTime = t + case paxSize: + size, err := strconv.ParseInt(v, 10, 0) + if err != nil { + return err + } + hdr.Size = int64(size) + default: + if strings.HasPrefix(k, paxXattr) { + if hdr.Xattrs == nil { + hdr.Xattrs = make(map[string]string) + } + hdr.Xattrs[k[len(paxXattr):]] = v + } + } + } + return nil +} + +// parsePAXTime takes a string of the form %d.%d as described in +// the PAX specification. +func parsePAXTime(t string) (time.Time, error) { + buf := []byte(t) + pos := bytes.IndexByte(buf, '.') + var seconds, nanoseconds int64 + var err error + if pos == -1 { + seconds, err = strconv.ParseInt(t, 10, 0) + if err != nil { + return time.Time{}, err + } + } else { + seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0) + if err != nil { + return time.Time{}, err + } + nano_buf := string(buf[pos+1:]) + // Pad as needed before converting to a decimal. + // For example .030 -> .030000000 -> 30000000 nanoseconds + if len(nano_buf) < maxNanoSecondIntSize { + // Right pad + nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf)) + } else if len(nano_buf) > maxNanoSecondIntSize { + // Right truncate + nano_buf = nano_buf[:maxNanoSecondIntSize] + } + nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0) + if err != nil { + return time.Time{}, err + } + } + ts := time.Unix(seconds, nanoseconds) + return ts, nil +} + +// parsePAX parses PAX headers. +// If an extended header (type 'x') is invalid, ErrHeader is returned +func parsePAX(r io.Reader) (map[string]string, error) { + buf, err := ioutil.ReadAll(r) + if err != nil { + return nil, err + } + // leaving this function for io.Reader makes it more testable + if tr, ok := r.(*Reader); ok && tr.RawAccounting { + if _, err = tr.rawBytes.Write(buf); err != nil { + return nil, err + } + } + + // For GNU PAX sparse format 0.0 support. + // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers. + var sparseMap bytes.Buffer + + headers := make(map[string]string) + // Each record is constructed as + // "%d %s=%s\n", length, keyword, value + for len(buf) > 0 { + // or the header was empty to start with. + var sp int + // The size field ends at the first space. + sp = bytes.IndexByte(buf, ' ') + if sp == -1 { + return nil, ErrHeader + } + // Parse the first token as a decimal integer. + n, err := strconv.ParseInt(string(buf[:sp]), 10, 0) + if err != nil { + return nil, ErrHeader + } + // Extract everything between the decimal and the n -1 on the + // beginning to eat the ' ', -1 on the end to skip the newline. + var record []byte + record, buf = buf[sp+1:n-1], buf[n:] + // The first equals is guaranteed to mark the end of the key. + // Everything else is value. + eq := bytes.IndexByte(record, '=') + if eq == -1 { + return nil, ErrHeader + } + key, value := record[:eq], record[eq+1:] + + keyStr := string(key) + if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes { + // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map. + sparseMap.Write(value) + sparseMap.Write([]byte{','}) + } else { + // Normal key. Set the value in the headers map. + headers[keyStr] = string(value) + } + } + if sparseMap.Len() != 0 { + // Add sparse info to headers, chopping off the extra comma + sparseMap.Truncate(sparseMap.Len() - 1) + headers[paxGNUSparseMap] = sparseMap.String() + } + return headers, nil +} + +// cString parses bytes as a NUL-terminated C-style string. +// If a NUL byte is not found then the whole slice is returned as a string. +func cString(b []byte) string { + n := 0 + for n < len(b) && b[n] != 0 { + n++ + } + return string(b[0:n]) +} + +func (tr *Reader) octal(b []byte) int64 { + // Check for binary format first. + if len(b) > 0 && b[0]&0x80 != 0 { + var x int64 + for i, c := range b { + if i == 0 { + c &= 0x7f // ignore signal bit in first byte + } + x = x<<8 | int64(c) + } + return x + } + + // Because unused fields are filled with NULs, we need + // to skip leading NULs. Fields may also be padded with + // spaces or NULs. + // So we remove leading and trailing NULs and spaces to + // be sure. + b = bytes.Trim(b, " \x00") + + if len(b) == 0 { + return 0 + } + x, err := strconv.ParseUint(cString(b), 8, 64) + if err != nil { + tr.err = err + } + return int64(x) +} + +// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding. +func (tr *Reader) skipUnread() { + nr := tr.numBytes() + tr.pad // number of bytes to skip + tr.curr, tr.pad = nil, 0 + if tr.RawAccounting { + _, tr.err = io.CopyN(tr.rawBytes, tr.r, nr) + return + } + if sr, ok := tr.r.(io.Seeker); ok { + if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil { + return + } + } + _, tr.err = io.CopyN(ioutil.Discard, tr.r, nr) +} + +func (tr *Reader) verifyChecksum(header []byte) bool { + if tr.err != nil { + return false + } + + given := tr.octal(header[148:156]) + unsigned, signed := checksum(header) + return given == unsigned || given == signed +} + +func (tr *Reader) readHeader() *Header { + header := tr.hdrBuff[:] + copy(header, zeroBlock) + + if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { + // because it could read some of the block, but reach EOF first + if tr.err == io.EOF && tr.RawAccounting { + if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { + return nil + } + } + return nil + } + if tr.RawAccounting { + if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { + return nil + } + } + + // Two blocks of zero bytes marks the end of the archive. + if bytes.Equal(header, zeroBlock[0:blockSize]) { + if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil { + // because it could read some of the block, but reach EOF first + if tr.err == io.EOF && tr.RawAccounting { + if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { + return nil + } + } + return nil + } + if tr.RawAccounting { + if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { + return nil + } + } + if bytes.Equal(header, zeroBlock[0:blockSize]) { + tr.err = io.EOF + } else { + tr.err = ErrHeader // zero block and then non-zero block + } + return nil + } + + if !tr.verifyChecksum(header) { + tr.err = ErrHeader + return nil + } + + // Unpack + hdr := new(Header) + s := slicer(header) + + hdr.Name = cString(s.next(100)) + hdr.Mode = tr.octal(s.next(8)) + hdr.Uid = int(tr.octal(s.next(8))) + hdr.Gid = int(tr.octal(s.next(8))) + hdr.Size = tr.octal(s.next(12)) + hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0) + s.next(8) // chksum + hdr.Typeflag = s.next(1)[0] + hdr.Linkname = cString(s.next(100)) + + // The remainder of the header depends on the value of magic. + // The original (v7) version of tar had no explicit magic field, + // so its magic bytes, like the rest of the block, are NULs. + magic := string(s.next(8)) // contains version field as well. + var format string + switch { + case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988) + if string(header[508:512]) == "tar\x00" { + format = "star" + } else { + format = "posix" + } + case magic == "ustar \x00": // old GNU tar + format = "gnu" + } + + switch format { + case "posix", "gnu", "star": + hdr.Uname = cString(s.next(32)) + hdr.Gname = cString(s.next(32)) + devmajor := s.next(8) + devminor := s.next(8) + if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { + hdr.Devmajor = tr.octal(devmajor) + hdr.Devminor = tr.octal(devminor) + } + var prefix string + switch format { + case "posix", "gnu": + prefix = cString(s.next(155)) + case "star": + prefix = cString(s.next(131)) + hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0) + hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0) + } + if len(prefix) > 0 { + hdr.Name = prefix + "/" + hdr.Name + } + } + + if tr.err != nil { + tr.err = ErrHeader + return nil + } + + // Maximum value of hdr.Size is 64 GB (12 octal digits), + // so there's no risk of int64 overflowing. + nb := int64(hdr.Size) + tr.pad = -nb & (blockSize - 1) // blockSize is a power of two + + // Set the current file reader. + tr.curr = ®FileReader{r: tr.r, nb: nb} + + // Check for old GNU sparse format entry. + if hdr.Typeflag == TypeGNUSparse { + // Get the real size of the file. + hdr.Size = tr.octal(header[483:495]) + + // Read the sparse map. + sp := tr.readOldGNUSparseMap(header) + if tr.err != nil { + return nil + } + // Current file is a GNU sparse file. Update the current file reader. + tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size} + } + + return hdr +} + +// A sparseEntry holds a single entry in a sparse file's sparse map. +// A sparse entry indicates the offset and size in a sparse file of a +// block of data. +type sparseEntry struct { + offset int64 + numBytes int64 +} + +// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. +// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, +// then one or more extension headers are used to store the rest of the sparse map. +func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { + isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0 + spCap := oldGNUSparseMainHeaderNumEntries + if isExtended { + spCap += oldGNUSparseExtendedHeaderNumEntries + } + sp := make([]sparseEntry, 0, spCap) + s := slicer(header[oldGNUSparseMainHeaderOffset:]) + + // Read the four entries from the main tar header + for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ { + offset := tr.octal(s.next(oldGNUSparseOffsetSize)) + numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize)) + if tr.err != nil { + tr.err = ErrHeader + return nil + } + if offset == 0 && numBytes == 0 { + break + } + sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) + } + + for isExtended { + // There are more entries. Read an extension header and parse its entries. + sparseHeader := make([]byte, blockSize) + if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil { + return nil + } + if tr.RawAccounting { + if _, tr.err = tr.rawBytes.Write(sparseHeader); tr.err != nil { + return nil + } + } + + isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0 + s = slicer(sparseHeader) + for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ { + offset := tr.octal(s.next(oldGNUSparseOffsetSize)) + numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize)) + if tr.err != nil { + tr.err = ErrHeader + return nil + } + if offset == 0 && numBytes == 0 { + break + } + sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) + } + } + return sp +} + +// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format version 1.0. +// The sparse map is stored just before the file data and padded out to the nearest block boundary. +func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { + buf := make([]byte, 2*blockSize) + sparseHeader := buf[:blockSize] + + // readDecimal is a helper function to read a decimal integer from the sparse map + // while making sure to read from the file in blocks of size blockSize + readDecimal := func() (int64, error) { + // Look for newline + nl := bytes.IndexByte(sparseHeader, '\n') + if nl == -1 { + if len(sparseHeader) >= blockSize { + // This is an error + return 0, ErrHeader + } + oldLen := len(sparseHeader) + newLen := oldLen + blockSize + if cap(sparseHeader) < newLen { + // There's more header, but we need to make room for the next block + copy(buf, sparseHeader) + sparseHeader = buf[:newLen] + } else { + // There's more header, and we can just reslice + sparseHeader = sparseHeader[:newLen] + } + + // Now that sparseHeader is large enough, read next block + if _, err := io.ReadFull(r, sparseHeader[oldLen:newLen]); err != nil { + return 0, err + } + // leaving this function for io.Reader makes it more testable + if tr, ok := r.(*Reader); ok && tr.RawAccounting { + if _, err := tr.rawBytes.Write(sparseHeader[oldLen:newLen]); err != nil { + return 0, err + } + } + + // Look for a newline in the new data + nl = bytes.IndexByte(sparseHeader[oldLen:newLen], '\n') + if nl == -1 { + // This is an error + return 0, ErrHeader + } + nl += oldLen // We want the position from the beginning + } + // Now that we've found a newline, read a number + n, err := strconv.ParseInt(string(sparseHeader[:nl]), 10, 0) + if err != nil { + return 0, ErrHeader + } + + // Update sparseHeader to consume this number + sparseHeader = sparseHeader[nl+1:] + return n, nil + } + + // Read the first block + if _, err := io.ReadFull(r, sparseHeader); err != nil { + return nil, err + } + // leaving this function for io.Reader makes it more testable + if tr, ok := r.(*Reader); ok && tr.RawAccounting { + if _, err := tr.rawBytes.Write(sparseHeader); err != nil { + return nil, err + } + } + + // The first line contains the number of entries + numEntries, err := readDecimal() + if err != nil { + return nil, err + } + + // Read all the entries + sp := make([]sparseEntry, 0, numEntries) + for i := int64(0); i < numEntries; i++ { + // Read the offset + offset, err := readDecimal() + if err != nil { + return nil, err + } + // Read numBytes + numBytes, err := readDecimal() + if err != nil { + return nil, err + } + + sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) + } + + return sp, nil +} + +// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format version 0.1. +// The sparse map is stored in the PAX headers. +func readGNUSparseMap0x1(headers map[string]string) ([]sparseEntry, error) { + // Get number of entries + numEntriesStr, ok := headers[paxGNUSparseNumBlocks] + if !ok { + return nil, ErrHeader + } + numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) + if err != nil { + return nil, ErrHeader + } + + sparseMap := strings.Split(headers[paxGNUSparseMap], ",") + + // There should be two numbers in sparseMap for each entry + if int64(len(sparseMap)) != 2*numEntries { + return nil, ErrHeader + } + + // Loop through the entries in the sparse map + sp := make([]sparseEntry, 0, numEntries) + for i := int64(0); i < numEntries; i++ { + offset, err := strconv.ParseInt(sparseMap[2*i], 10, 0) + if err != nil { + return nil, ErrHeader + } + numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 0) + if err != nil { + return nil, ErrHeader + } + sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) + } + + return sp, nil +} + +// numBytes returns the number of bytes left to read in the current file's entry +// in the tar archive, or 0 if there is no current file. +func (tr *Reader) numBytes() int64 { + if tr.curr == nil { + // No current file, so no bytes + return 0 + } + return tr.curr.numBytes() +} + +// Read reads from the current entry in the tar archive. +// It returns 0, io.EOF when it reaches the end of that entry, +// until Next is called to advance to the next entry. +func (tr *Reader) Read(b []byte) (n int, err error) { + if tr.curr == nil { + return 0, io.EOF + } + n, err = tr.curr.Read(b) + if err != nil && err != io.EOF { + tr.err = err + } + return +} + +func (rfr *regFileReader) Read(b []byte) (n int, err error) { + if rfr.nb == 0 { + // file consumed + return 0, io.EOF + } + if int64(len(b)) > rfr.nb { + b = b[0:rfr.nb] + } + n, err = rfr.r.Read(b) + rfr.nb -= int64(n) + + if err == io.EOF && rfr.nb > 0 { + err = io.ErrUnexpectedEOF + } + return +} + +// numBytes returns the number of bytes left to read in the file's data in the tar archive. +func (rfr *regFileReader) numBytes() int64 { + return rfr.nb +} + +// readHole reads a sparse file hole ending at offset toOffset +func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int { + n64 := toOffset - sfr.pos + if n64 > int64(len(b)) { + n64 = int64(len(b)) + } + n := int(n64) + for i := 0; i < n; i++ { + b[i] = 0 + } + sfr.pos += n64 + return n +} + +// Read reads the sparse file data in expanded form. +func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { + if len(sfr.sp) == 0 { + // No more data fragments to read from. + if sfr.pos < sfr.tot { + // We're in the last hole + n = sfr.readHole(b, sfr.tot) + return + } + // Otherwise, we're at the end of the file + return 0, io.EOF + } + if sfr.pos < sfr.sp[0].offset { + // We're in a hole + n = sfr.readHole(b, sfr.sp[0].offset) + return + } + + // We're not in a hole, so we'll read from the next data fragment + posInFragment := sfr.pos - sfr.sp[0].offset + bytesLeft := sfr.sp[0].numBytes - posInFragment + if int64(len(b)) > bytesLeft { + b = b[0:bytesLeft] + } + + n, err = sfr.rfr.Read(b) + sfr.pos += int64(n) + + if int64(n) == bytesLeft { + // We're done with this fragment + sfr.sp = sfr.sp[1:] + } + + if err == io.EOF && sfr.pos < sfr.tot { + // We reached the end of the last fragment's data, but there's a final hole + err = nil + } + return +} + +// numBytes returns the number of bytes left to read in the sparse file's +// sparse-encoded data in the tar archive. +func (sfr *sparseFileReader) numBytes() int64 { + return sfr.rfr.nb +} diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/reader_test.go b/vendor/src/github.com/vbatts/tar-split/archive/tar/reader_test.go new file mode 100644 index 0000000000..9601ffe459 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/archive/tar/reader_test.go @@ -0,0 +1,743 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "crypto/md5" + "fmt" + "io" + "io/ioutil" + "os" + "reflect" + "strings" + "testing" + "time" +) + +type untarTest struct { + file string + headers []*Header + cksums []string +} + +var gnuTarTest = &untarTest{ + file: "testdata/gnu.tar", + headers: []*Header{ + { + Name: "small.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 5, + ModTime: time.Unix(1244428340, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + { + Name: "small2.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 11, + ModTime: time.Unix(1244436044, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + }, + cksums: []string{ + "e38b27eaccb4391bdec553a7f3ae6b2f", + "c65bd2e50a56a2138bf1716f2fd56fe9", + }, +} + +var sparseTarTest = &untarTest{ + file: "testdata/sparse-formats.tar", + headers: []*Header{ + { + Name: "sparse-gnu", + Mode: 420, + Uid: 1000, + Gid: 1000, + Size: 200, + ModTime: time.Unix(1392395740, 0), + Typeflag: 0x53, + Linkname: "", + Uname: "david", + Gname: "david", + Devmajor: 0, + Devminor: 0, + }, + { + Name: "sparse-posix-0.0", + Mode: 420, + Uid: 1000, + Gid: 1000, + Size: 200, + ModTime: time.Unix(1392342187, 0), + Typeflag: 0x30, + Linkname: "", + Uname: "david", + Gname: "david", + Devmajor: 0, + Devminor: 0, + }, + { + Name: "sparse-posix-0.1", + Mode: 420, + Uid: 1000, + Gid: 1000, + Size: 200, + ModTime: time.Unix(1392340456, 0), + Typeflag: 0x30, + Linkname: "", + Uname: "david", + Gname: "david", + Devmajor: 0, + Devminor: 0, + }, + { + Name: "sparse-posix-1.0", + Mode: 420, + Uid: 1000, + Gid: 1000, + Size: 200, + ModTime: time.Unix(1392337404, 0), + Typeflag: 0x30, + Linkname: "", + Uname: "david", + Gname: "david", + Devmajor: 0, + Devminor: 0, + }, + { + Name: "end", + Mode: 420, + Uid: 1000, + Gid: 1000, + Size: 4, + ModTime: time.Unix(1392398319, 0), + Typeflag: 0x30, + Linkname: "", + Uname: "david", + Gname: "david", + Devmajor: 0, + Devminor: 0, + }, + }, + cksums: []string{ + "6f53234398c2449fe67c1812d993012f", + "6f53234398c2449fe67c1812d993012f", + "6f53234398c2449fe67c1812d993012f", + "6f53234398c2449fe67c1812d993012f", + "b0061974914468de549a2af8ced10316", + }, +} + +var untarTests = []*untarTest{ + gnuTarTest, + sparseTarTest, + { + file: "testdata/star.tar", + headers: []*Header{ + { + Name: "small.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 5, + ModTime: time.Unix(1244592783, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + AccessTime: time.Unix(1244592783, 0), + ChangeTime: time.Unix(1244592783, 0), + }, + { + Name: "small2.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 11, + ModTime: time.Unix(1244592783, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + AccessTime: time.Unix(1244592783, 0), + ChangeTime: time.Unix(1244592783, 0), + }, + }, + }, + { + file: "testdata/v7.tar", + headers: []*Header{ + { + Name: "small.txt", + Mode: 0444, + Uid: 73025, + Gid: 5000, + Size: 5, + ModTime: time.Unix(1244593104, 0), + Typeflag: '\x00', + }, + { + Name: "small2.txt", + Mode: 0444, + Uid: 73025, + Gid: 5000, + Size: 11, + ModTime: time.Unix(1244593104, 0), + Typeflag: '\x00', + }, + }, + }, + { + file: "testdata/pax.tar", + headers: []*Header{ + { + Name: "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + Mode: 0664, + Uid: 1000, + Gid: 1000, + Uname: "shane", + Gname: "shane", + Size: 7, + ModTime: time.Unix(1350244992, 23960108), + ChangeTime: time.Unix(1350244992, 23960108), + AccessTime: time.Unix(1350244992, 23960108), + Typeflag: TypeReg, + }, + { + Name: "a/b", + Mode: 0777, + Uid: 1000, + Gid: 1000, + Uname: "shane", + Gname: "shane", + Size: 0, + ModTime: time.Unix(1350266320, 910238425), + ChangeTime: time.Unix(1350266320, 910238425), + AccessTime: time.Unix(1350266320, 910238425), + Typeflag: TypeSymlink, + Linkname: "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100", + }, + }, + }, + { + file: "testdata/nil-uid.tar", // golang.org/issue/5290 + headers: []*Header{ + { + Name: "P1050238.JPG.log", + Mode: 0664, + Uid: 0, + Gid: 0, + Size: 14, + ModTime: time.Unix(1365454838, 0), + Typeflag: TypeReg, + Linkname: "", + Uname: "eyefi", + Gname: "eyefi", + Devmajor: 0, + Devminor: 0, + }, + }, + }, + { + file: "testdata/xattrs.tar", + headers: []*Header{ + { + Name: "small.txt", + Mode: 0644, + Uid: 1000, + Gid: 10, + Size: 5, + ModTime: time.Unix(1386065770, 448252320), + Typeflag: '0', + Uname: "alex", + Gname: "wheel", + AccessTime: time.Unix(1389782991, 419875220), + ChangeTime: time.Unix(1389782956, 794414986), + Xattrs: map[string]string{ + "user.key": "value", + "user.key2": "value2", + // Interestingly, selinux encodes the terminating null inside the xattr + "security.selinux": "unconfined_u:object_r:default_t:s0\x00", + }, + }, + { + Name: "small2.txt", + Mode: 0644, + Uid: 1000, + Gid: 10, + Size: 11, + ModTime: time.Unix(1386065770, 449252304), + Typeflag: '0', + Uname: "alex", + Gname: "wheel", + AccessTime: time.Unix(1389782991, 419875220), + ChangeTime: time.Unix(1386065770, 449252304), + Xattrs: map[string]string{ + "security.selinux": "unconfined_u:object_r:default_t:s0\x00", + }, + }, + }, + }, +} + +func TestReader(t *testing.T) { +testLoop: + for i, test := range untarTests { + f, err := os.Open(test.file) + if err != nil { + t.Errorf("test %d: Unexpected error: %v", i, err) + continue + } + defer f.Close() + tr := NewReader(f) + for j, header := range test.headers { + hdr, err := tr.Next() + if err != nil || hdr == nil { + t.Errorf("test %d, entry %d: Didn't get entry: %v", i, j, err) + f.Close() + continue testLoop + } + if !reflect.DeepEqual(*hdr, *header) { + t.Errorf("test %d, entry %d: Incorrect header:\nhave %+v\nwant %+v", + i, j, *hdr, *header) + } + } + hdr, err := tr.Next() + if err == io.EOF { + continue testLoop + } + if hdr != nil || err != nil { + t.Errorf("test %d: Unexpected entry or error: hdr=%v err=%v", i, hdr, err) + } + } +} + +func TestPartialRead(t *testing.T) { + f, err := os.Open("testdata/gnu.tar") + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer f.Close() + + tr := NewReader(f) + + // Read the first four bytes; Next() should skip the last byte. + hdr, err := tr.Next() + if err != nil || hdr == nil { + t.Fatalf("Didn't get first file: %v", err) + } + buf := make([]byte, 4) + if _, err := io.ReadFull(tr, buf); err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if expected := []byte("Kilt"); !bytes.Equal(buf, expected) { + t.Errorf("Contents = %v, want %v", buf, expected) + } + + // Second file + hdr, err = tr.Next() + if err != nil || hdr == nil { + t.Fatalf("Didn't get second file: %v", err) + } + buf = make([]byte, 6) + if _, err := io.ReadFull(tr, buf); err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if expected := []byte("Google"); !bytes.Equal(buf, expected) { + t.Errorf("Contents = %v, want %v", buf, expected) + } +} + +func TestIncrementalRead(t *testing.T) { + test := gnuTarTest + f, err := os.Open(test.file) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer f.Close() + + tr := NewReader(f) + + headers := test.headers + cksums := test.cksums + nread := 0 + + // loop over all files + for ; ; nread++ { + hdr, err := tr.Next() + if hdr == nil || err == io.EOF { + break + } + + // check the header + if !reflect.DeepEqual(*hdr, *headers[nread]) { + t.Errorf("Incorrect header:\nhave %+v\nwant %+v", + *hdr, headers[nread]) + } + + // read file contents in little chunks EOF, + // checksumming all the way + h := md5.New() + rdbuf := make([]uint8, 8) + for { + nr, err := tr.Read(rdbuf) + if err == io.EOF { + break + } + if err != nil { + t.Errorf("Read: unexpected error %v\n", err) + break + } + h.Write(rdbuf[0:nr]) + } + // verify checksum + have := fmt.Sprintf("%x", h.Sum(nil)) + want := cksums[nread] + if want != have { + t.Errorf("Bad checksum on file %s:\nhave %+v\nwant %+v", hdr.Name, have, want) + } + } + if nread != len(headers) { + t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(headers), nread) + } +} + +func TestNonSeekable(t *testing.T) { + test := gnuTarTest + f, err := os.Open(test.file) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer f.Close() + + type readerOnly struct { + io.Reader + } + tr := NewReader(readerOnly{f}) + nread := 0 + + for ; ; nread++ { + _, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + } + + if nread != len(test.headers) { + t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(test.headers), nread) + } +} + +func TestParsePAXHeader(t *testing.T) { + paxTests := [][3]string{ + {"a", "a=name", "10 a=name\n"}, // Test case involving multiple acceptable lengths + {"a", "a=name", "9 a=name\n"}, // Test case involving multiple acceptable length + {"mtime", "mtime=1350244992.023960108", "30 mtime=1350244992.023960108\n"}} + for _, test := range paxTests { + key, expected, raw := test[0], test[1], test[2] + reader := bytes.NewReader([]byte(raw)) + headers, err := parsePAX(reader) + if err != nil { + t.Errorf("Couldn't parse correctly formatted headers: %v", err) + continue + } + if strings.EqualFold(headers[key], expected) { + t.Errorf("mtime header incorrectly parsed: got %s, wanted %s", headers[key], expected) + continue + } + trailer := make([]byte, 100) + n, err := reader.Read(trailer) + if err != io.EOF || n != 0 { + t.Error("Buffer wasn't consumed") + } + } + badHeader := bytes.NewReader([]byte("3 somelongkey=")) + if _, err := parsePAX(badHeader); err != ErrHeader { + t.Fatal("Unexpected success when parsing bad header") + } +} + +func TestParsePAXTime(t *testing.T) { + // Some valid PAX time values + timestamps := map[string]time.Time{ + "1350244992.023960108": time.Unix(1350244992, 23960108), // The common case + "1350244992.02396010": time.Unix(1350244992, 23960100), // Lower precision value + "1350244992.0239601089": time.Unix(1350244992, 23960108), // Higher precision value + "1350244992": time.Unix(1350244992, 0), // Low precision value + } + for input, expected := range timestamps { + ts, err := parsePAXTime(input) + if err != nil { + t.Fatal(err) + } + if !ts.Equal(expected) { + t.Fatalf("Time parsing failure %s %s", ts, expected) + } + } +} + +func TestMergePAX(t *testing.T) { + hdr := new(Header) + // Test a string, integer, and time based value. + headers := map[string]string{ + "path": "a/b/c", + "uid": "1000", + "mtime": "1350244992.023960108", + } + err := mergePAX(hdr, headers) + if err != nil { + t.Fatal(err) + } + want := &Header{ + Name: "a/b/c", + Uid: 1000, + ModTime: time.Unix(1350244992, 23960108), + } + if !reflect.DeepEqual(hdr, want) { + t.Errorf("incorrect merge: got %+v, want %+v", hdr, want) + } +} + +func TestSparseEndToEnd(t *testing.T) { + test := sparseTarTest + f, err := os.Open(test.file) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer f.Close() + + tr := NewReader(f) + + headers := test.headers + cksums := test.cksums + nread := 0 + + // loop over all files + for ; ; nread++ { + hdr, err := tr.Next() + if hdr == nil || err == io.EOF { + break + } + + // check the header + if !reflect.DeepEqual(*hdr, *headers[nread]) { + t.Errorf("Incorrect header:\nhave %+v\nwant %+v", + *hdr, headers[nread]) + } + + // read and checksum the file data + h := md5.New() + _, err = io.Copy(h, tr) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + // verify checksum + have := fmt.Sprintf("%x", h.Sum(nil)) + want := cksums[nread] + if want != have { + t.Errorf("Bad checksum on file %s:\nhave %+v\nwant %+v", hdr.Name, have, want) + } + } + if nread != len(headers) { + t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(headers), nread) + } +} + +type sparseFileReadTest struct { + sparseData []byte + sparseMap []sparseEntry + realSize int64 + expected []byte +} + +var sparseFileReadTests = []sparseFileReadTest{ + { + sparseData: []byte("abcde"), + sparseMap: []sparseEntry{ + {offset: 0, numBytes: 2}, + {offset: 5, numBytes: 3}, + }, + realSize: 8, + expected: []byte("ab\x00\x00\x00cde"), + }, + { + sparseData: []byte("abcde"), + sparseMap: []sparseEntry{ + {offset: 0, numBytes: 2}, + {offset: 5, numBytes: 3}, + }, + realSize: 10, + expected: []byte("ab\x00\x00\x00cde\x00\x00"), + }, + { + sparseData: []byte("abcde"), + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 6, numBytes: 2}, + }, + realSize: 8, + expected: []byte("\x00abc\x00\x00de"), + }, + { + sparseData: []byte("abcde"), + sparseMap: []sparseEntry{ + {offset: 1, numBytes: 3}, + {offset: 6, numBytes: 2}, + }, + realSize: 10, + expected: []byte("\x00abc\x00\x00de\x00\x00"), + }, + { + sparseData: []byte(""), + sparseMap: nil, + realSize: 2, + expected: []byte("\x00\x00"), + }, +} + +func TestSparseFileReader(t *testing.T) { + for i, test := range sparseFileReadTests { + r := bytes.NewReader(test.sparseData) + nb := int64(r.Len()) + sfr := &sparseFileReader{ + rfr: ®FileReader{r: r, nb: nb}, + sp: test.sparseMap, + pos: 0, + tot: test.realSize, + } + if sfr.numBytes() != nb { + t.Errorf("test %d: Before reading, sfr.numBytes() = %d, want %d", i, sfr.numBytes(), nb) + } + buf, err := ioutil.ReadAll(sfr) + if err != nil { + t.Errorf("test %d: Unexpected error: %v", i, err) + } + if e := test.expected; !bytes.Equal(buf, e) { + t.Errorf("test %d: Contents = %v, want %v", i, buf, e) + } + if sfr.numBytes() != 0 { + t.Errorf("test %d: After draining the reader, numBytes() was nonzero", i) + } + } +} + +func TestSparseIncrementalRead(t *testing.T) { + sparseMap := []sparseEntry{{10, 2}} + sparseData := []byte("Go") + expected := "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00Go\x00\x00\x00\x00\x00\x00\x00\x00" + + r := bytes.NewReader(sparseData) + nb := int64(r.Len()) + sfr := &sparseFileReader{ + rfr: ®FileReader{r: r, nb: nb}, + sp: sparseMap, + pos: 0, + tot: int64(len(expected)), + } + + // We'll read the data 6 bytes at a time, with a hole of size 10 at + // the beginning and one of size 8 at the end. + var outputBuf bytes.Buffer + buf := make([]byte, 6) + for { + n, err := sfr.Read(buf) + if err == io.EOF { + break + } + if err != nil { + t.Errorf("Read: unexpected error %v\n", err) + } + if n > 0 { + _, err := outputBuf.Write(buf[:n]) + if err != nil { + t.Errorf("Write: unexpected error %v\n", err) + } + } + } + got := outputBuf.String() + if got != expected { + t.Errorf("Contents = %v, want %v", got, expected) + } +} + +func TestReadGNUSparseMap0x1(t *testing.T) { + headers := map[string]string{ + paxGNUSparseNumBlocks: "4", + paxGNUSparseMap: "0,5,10,5,20,5,30,5", + } + expected := []sparseEntry{ + {offset: 0, numBytes: 5}, + {offset: 10, numBytes: 5}, + {offset: 20, numBytes: 5}, + {offset: 30, numBytes: 5}, + } + + sp, err := readGNUSparseMap0x1(headers) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if !reflect.DeepEqual(sp, expected) { + t.Errorf("Incorrect sparse map: got %v, wanted %v", sp, expected) + } +} + +func TestReadGNUSparseMap1x0(t *testing.T) { + // This test uses lots of holes so the sparse header takes up more than two blocks + numEntries := 100 + expected := make([]sparseEntry, 0, numEntries) + sparseMap := new(bytes.Buffer) + + fmt.Fprintf(sparseMap, "%d\n", numEntries) + for i := 0; i < numEntries; i++ { + offset := int64(2048 * i) + numBytes := int64(1024) + expected = append(expected, sparseEntry{offset: offset, numBytes: numBytes}) + fmt.Fprintf(sparseMap, "%d\n%d\n", offset, numBytes) + } + + // Make the header the smallest multiple of blockSize that fits the sparseMap + headerBlocks := (sparseMap.Len() + blockSize - 1) / blockSize + bufLen := blockSize * headerBlocks + buf := make([]byte, bufLen) + copy(buf, sparseMap.Bytes()) + + // Get an reader to read the sparse map + r := bytes.NewReader(buf) + + // Read the sparse map + sp, err := readGNUSparseMap1x0(r) + if err != nil { + t.Errorf("Unexpected error: %v", err) + } + if !reflect.DeepEqual(sp, expected) { + t.Errorf("Incorrect sparse map: got %v, wanted %v", sp, expected) + } +} + +func TestUninitializedRead(t *testing.T) { + test := gnuTarTest + f, err := os.Open(test.file) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + defer f.Close() + + tr := NewReader(f) + _, err = tr.Read([]byte{}) + if err == nil || err != io.EOF { + t.Errorf("Unexpected error: %v, wanted %v", err, io.EOF) + } + +} diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/stat_atim.go b/vendor/src/github.com/vbatts/tar-split/archive/tar/stat_atim.go new file mode 100644 index 0000000000..cf9cc79c59 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/archive/tar/stat_atim.go @@ -0,0 +1,20 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux dragonfly openbsd solaris + +package tar + +import ( + "syscall" + "time" +) + +func statAtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Atim.Unix()) +} + +func statCtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Ctim.Unix()) +} diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/stat_atimespec.go b/vendor/src/github.com/vbatts/tar-split/archive/tar/stat_atimespec.go new file mode 100644 index 0000000000..6f17dbe307 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/archive/tar/stat_atimespec.go @@ -0,0 +1,20 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build darwin freebsd netbsd + +package tar + +import ( + "syscall" + "time" +) + +func statAtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Atimespec.Unix()) +} + +func statCtime(st *syscall.Stat_t) time.Time { + return time.Unix(st.Ctimespec.Unix()) +} diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/stat_unix.go b/vendor/src/github.com/vbatts/tar-split/archive/tar/stat_unix.go new file mode 100644 index 0000000000..cb843db4cf --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/archive/tar/stat_unix.go @@ -0,0 +1,32 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux darwin dragonfly freebsd openbsd netbsd solaris + +package tar + +import ( + "os" + "syscall" +) + +func init() { + sysStat = statUnix +} + +func statUnix(fi os.FileInfo, h *Header) error { + sys, ok := fi.Sys().(*syscall.Stat_t) + if !ok { + return nil + } + h.Uid = int(sys.Uid) + h.Gid = int(sys.Gid) + // TODO(bradfitz): populate username & group. os/user + // doesn't cache LookupId lookups, and lacks group + // lookup functions. + h.AccessTime = statAtime(sys) + h.ChangeTime = statCtime(sys) + // TODO(bradfitz): major/minor device numbers? + return nil +} diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/tar_test.go b/vendor/src/github.com/vbatts/tar-split/archive/tar/tar_test.go new file mode 100644 index 0000000000..ed333f3ea4 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/archive/tar/tar_test.go @@ -0,0 +1,284 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "io/ioutil" + "os" + "path" + "reflect" + "strings" + "testing" + "time" +) + +func TestFileInfoHeader(t *testing.T) { + fi, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + h, err := FileInfoHeader(fi, "") + if err != nil { + t.Fatalf("FileInfoHeader: %v", err) + } + if g, e := h.Name, "small.txt"; g != e { + t.Errorf("Name = %q; want %q", g, e) + } + if g, e := h.Mode, int64(fi.Mode().Perm())|c_ISREG; g != e { + t.Errorf("Mode = %#o; want %#o", g, e) + } + if g, e := h.Size, int64(5); g != e { + t.Errorf("Size = %v; want %v", g, e) + } + if g, e := h.ModTime, fi.ModTime(); !g.Equal(e) { + t.Errorf("ModTime = %v; want %v", g, e) + } + // FileInfoHeader should error when passing nil FileInfo + if _, err := FileInfoHeader(nil, ""); err == nil { + t.Fatalf("Expected error when passing nil to FileInfoHeader") + } +} + +func TestFileInfoHeaderDir(t *testing.T) { + fi, err := os.Stat("testdata") + if err != nil { + t.Fatal(err) + } + h, err := FileInfoHeader(fi, "") + if err != nil { + t.Fatalf("FileInfoHeader: %v", err) + } + if g, e := h.Name, "testdata/"; g != e { + t.Errorf("Name = %q; want %q", g, e) + } + // Ignoring c_ISGID for golang.org/issue/4867 + if g, e := h.Mode&^c_ISGID, int64(fi.Mode().Perm())|c_ISDIR; g != e { + t.Errorf("Mode = %#o; want %#o", g, e) + } + if g, e := h.Size, int64(0); g != e { + t.Errorf("Size = %v; want %v", g, e) + } + if g, e := h.ModTime, fi.ModTime(); !g.Equal(e) { + t.Errorf("ModTime = %v; want %v", g, e) + } +} + +func TestFileInfoHeaderSymlink(t *testing.T) { + h, err := FileInfoHeader(symlink{}, "some-target") + if err != nil { + t.Fatal(err) + } + if g, e := h.Name, "some-symlink"; g != e { + t.Errorf("Name = %q; want %q", g, e) + } + if g, e := h.Linkname, "some-target"; g != e { + t.Errorf("Linkname = %q; want %q", g, e) + } +} + +type symlink struct{} + +func (symlink) Name() string { return "some-symlink" } +func (symlink) Size() int64 { return 0 } +func (symlink) Mode() os.FileMode { return os.ModeSymlink } +func (symlink) ModTime() time.Time { return time.Time{} } +func (symlink) IsDir() bool { return false } +func (symlink) Sys() interface{} { return nil } + +func TestRoundTrip(t *testing.T) { + data := []byte("some file contents") + + var b bytes.Buffer + tw := NewWriter(&b) + hdr := &Header{ + Name: "file.txt", + Uid: 1 << 21, // too big for 8 octal digits + Size: int64(len(data)), + ModTime: time.Now(), + } + // tar only supports second precision. + hdr.ModTime = hdr.ModTime.Add(-time.Duration(hdr.ModTime.Nanosecond()) * time.Nanosecond) + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("tw.WriteHeader: %v", err) + } + if _, err := tw.Write(data); err != nil { + t.Fatalf("tw.Write: %v", err) + } + if err := tw.Close(); err != nil { + t.Fatalf("tw.Close: %v", err) + } + + // Read it back. + tr := NewReader(&b) + rHdr, err := tr.Next() + if err != nil { + t.Fatalf("tr.Next: %v", err) + } + if !reflect.DeepEqual(rHdr, hdr) { + t.Errorf("Header mismatch.\n got %+v\nwant %+v", rHdr, hdr) + } + rData, err := ioutil.ReadAll(tr) + if err != nil { + t.Fatalf("Read: %v", err) + } + if !bytes.Equal(rData, data) { + t.Errorf("Data mismatch.\n got %q\nwant %q", rData, data) + } +} + +type headerRoundTripTest struct { + h *Header + fm os.FileMode +} + +func TestHeaderRoundTrip(t *testing.T) { + golden := []headerRoundTripTest{ + // regular file. + { + h: &Header{ + Name: "test.txt", + Mode: 0644 | c_ISREG, + Size: 12, + ModTime: time.Unix(1360600916, 0), + Typeflag: TypeReg, + }, + fm: 0644, + }, + // hard link. + { + h: &Header{ + Name: "hard.txt", + Mode: 0644 | c_ISLNK, + Size: 0, + ModTime: time.Unix(1360600916, 0), + Typeflag: TypeLink, + }, + fm: 0644 | os.ModeSymlink, + }, + // symbolic link. + { + h: &Header{ + Name: "link.txt", + Mode: 0777 | c_ISLNK, + Size: 0, + ModTime: time.Unix(1360600852, 0), + Typeflag: TypeSymlink, + }, + fm: 0777 | os.ModeSymlink, + }, + // character device node. + { + h: &Header{ + Name: "dev/null", + Mode: 0666 | c_ISCHR, + Size: 0, + ModTime: time.Unix(1360578951, 0), + Typeflag: TypeChar, + }, + fm: 0666 | os.ModeDevice | os.ModeCharDevice, + }, + // block device node. + { + h: &Header{ + Name: "dev/sda", + Mode: 0660 | c_ISBLK, + Size: 0, + ModTime: time.Unix(1360578954, 0), + Typeflag: TypeBlock, + }, + fm: 0660 | os.ModeDevice, + }, + // directory. + { + h: &Header{ + Name: "dir/", + Mode: 0755 | c_ISDIR, + Size: 0, + ModTime: time.Unix(1360601116, 0), + Typeflag: TypeDir, + }, + fm: 0755 | os.ModeDir, + }, + // fifo node. + { + h: &Header{ + Name: "dev/initctl", + Mode: 0600 | c_ISFIFO, + Size: 0, + ModTime: time.Unix(1360578949, 0), + Typeflag: TypeFifo, + }, + fm: 0600 | os.ModeNamedPipe, + }, + // setuid. + { + h: &Header{ + Name: "bin/su", + Mode: 0755 | c_ISREG | c_ISUID, + Size: 23232, + ModTime: time.Unix(1355405093, 0), + Typeflag: TypeReg, + }, + fm: 0755 | os.ModeSetuid, + }, + // setguid. + { + h: &Header{ + Name: "group.txt", + Mode: 0750 | c_ISREG | c_ISGID, + Size: 0, + ModTime: time.Unix(1360602346, 0), + Typeflag: TypeReg, + }, + fm: 0750 | os.ModeSetgid, + }, + // sticky. + { + h: &Header{ + Name: "sticky.txt", + Mode: 0600 | c_ISREG | c_ISVTX, + Size: 7, + ModTime: time.Unix(1360602540, 0), + Typeflag: TypeReg, + }, + fm: 0600 | os.ModeSticky, + }, + } + + for i, g := range golden { + fi := g.h.FileInfo() + h2, err := FileInfoHeader(fi, "") + if err != nil { + t.Error(err) + continue + } + if strings.Contains(fi.Name(), "/") { + t.Errorf("FileInfo of %q contains slash: %q", g.h.Name, fi.Name()) + } + name := path.Base(g.h.Name) + if fi.IsDir() { + name += "/" + } + if got, want := h2.Name, name; got != want { + t.Errorf("i=%d: Name: got %v, want %v", i, got, want) + } + if got, want := h2.Size, g.h.Size; got != want { + t.Errorf("i=%d: Size: got %v, want %v", i, got, want) + } + if got, want := h2.Mode, g.h.Mode; got != want { + t.Errorf("i=%d: Mode: got %o, want %o", i, got, want) + } + if got, want := fi.Mode(), g.fm; got != want { + t.Errorf("i=%d: fi.Mode: got %o, want %o", i, got, want) + } + if got, want := h2.ModTime, g.h.ModTime; got != want { + t.Errorf("i=%d: ModTime: got %v, want %v", i, got, want) + } + if sysh, ok := fi.Sys().(*Header); !ok || sysh != g.h { + t.Errorf("i=%d: Sys didn't return original *Header", i) + } + } +} diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/gnu.tar b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/gnu.tar new file mode 100644 index 0000000000..fc899dc8dc Binary files /dev/null and b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/gnu.tar differ diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/nil-uid.tar b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/nil-uid.tar new file mode 100644 index 0000000000..cc9cfaa33c Binary files /dev/null and b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/nil-uid.tar differ diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/pax.tar b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/pax.tar new file mode 100644 index 0000000000..9bc24b6587 Binary files /dev/null and b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/pax.tar differ diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/small.txt b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/small.txt new file mode 100644 index 0000000000..b249bfc518 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/small.txt @@ -0,0 +1 @@ +Kilts \ No newline at end of file diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/small2.txt b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/small2.txt new file mode 100644 index 0000000000..394ee3ecd0 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/small2.txt @@ -0,0 +1 @@ +Google.com diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/sparse-formats.tar b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/sparse-formats.tar new file mode 100644 index 0000000000..8bd4e74d50 Binary files /dev/null and b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/sparse-formats.tar differ diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/star.tar b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/star.tar new file mode 100644 index 0000000000..59e2d4e604 Binary files /dev/null and b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/star.tar differ diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/ustar.tar b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/ustar.tar new file mode 100644 index 0000000000..29679d9a30 Binary files /dev/null and b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/ustar.tar differ diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/v7.tar b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/v7.tar new file mode 100644 index 0000000000..eb65fc9410 Binary files /dev/null and b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/v7.tar differ diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/writer-big-long.tar b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/writer-big-long.tar new file mode 100644 index 0000000000..5960ee8247 Binary files /dev/null and b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/writer-big-long.tar differ diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/writer-big.tar b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/writer-big.tar new file mode 100644 index 0000000000..753e883ceb Binary files /dev/null and b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/writer-big.tar differ diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/writer.tar b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/writer.tar new file mode 100644 index 0000000000..e6d816ad07 Binary files /dev/null and b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/writer.tar differ diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/xattrs.tar b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/xattrs.tar new file mode 100644 index 0000000000..9701950edd Binary files /dev/null and b/vendor/src/github.com/vbatts/tar-split/archive/tar/testdata/xattrs.tar differ diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/writer.go b/vendor/src/github.com/vbatts/tar-split/archive/tar/writer.go new file mode 100644 index 0000000000..dafb2cabf3 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/archive/tar/writer.go @@ -0,0 +1,396 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +// TODO(dsymonds): +// - catch more errors (no first header, etc.) + +import ( + "bytes" + "errors" + "fmt" + "io" + "os" + "path" + "strconv" + "strings" + "time" +) + +var ( + ErrWriteTooLong = errors.New("archive/tar: write too long") + ErrFieldTooLong = errors.New("archive/tar: header field too long") + ErrWriteAfterClose = errors.New("archive/tar: write after close") + errNameTooLong = errors.New("archive/tar: name too long") + errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values") +) + +// A Writer provides sequential writing of a tar archive in POSIX.1 format. +// A tar archive consists of a sequence of files. +// Call WriteHeader to begin a new file, and then call Write to supply that file's data, +// writing at most hdr.Size bytes in total. +type Writer struct { + w io.Writer + err error + nb int64 // number of unwritten bytes for current file entry + pad int64 // amount of padding to write after current file entry + closed bool + usedBinary bool // whether the binary numeric field extension was used + preferPax bool // use pax header instead of binary numeric header + hdrBuff [blockSize]byte // buffer to use in writeHeader when writing a regular header + paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header +} + +// NewWriter creates a new Writer writing to w. +func NewWriter(w io.Writer) *Writer { return &Writer{w: w} } + +// Flush finishes writing the current file (optional). +func (tw *Writer) Flush() error { + if tw.nb > 0 { + tw.err = fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb) + return tw.err + } + + n := tw.nb + tw.pad + for n > 0 && tw.err == nil { + nr := n + if nr > blockSize { + nr = blockSize + } + var nw int + nw, tw.err = tw.w.Write(zeroBlock[0:nr]) + n -= int64(nw) + } + tw.nb = 0 + tw.pad = 0 + return tw.err +} + +// Write s into b, terminating it with a NUL if there is room. +// If the value is too long for the field and allowPax is true add a paxheader record instead +func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string, paxHeaders map[string]string) { + needsPaxHeader := allowPax && len(s) > len(b) || !isASCII(s) + if needsPaxHeader { + paxHeaders[paxKeyword] = s + return + } + if len(s) > len(b) { + if tw.err == nil { + tw.err = ErrFieldTooLong + } + return + } + ascii := toASCII(s) + copy(b, ascii) + if len(ascii) < len(b) { + b[len(ascii)] = 0 + } +} + +// Encode x as an octal ASCII string and write it into b with leading zeros. +func (tw *Writer) octal(b []byte, x int64) { + s := strconv.FormatInt(x, 8) + // leading zeros, but leave room for a NUL. + for len(s)+1 < len(b) { + s = "0" + s + } + tw.cString(b, s, false, paxNone, nil) +} + +// Write x into b, either as octal or as binary (GNUtar/star extension). +// If the value is too long for the field and writingPax is enabled both for the field and the add a paxheader record instead +func (tw *Writer) numeric(b []byte, x int64, allowPax bool, paxKeyword string, paxHeaders map[string]string) { + // Try octal first. + s := strconv.FormatInt(x, 8) + if len(s) < len(b) { + tw.octal(b, x) + return + } + + // If it is too long for octal, and pax is preferred, use a pax header + if allowPax && tw.preferPax { + tw.octal(b, 0) + s := strconv.FormatInt(x, 10) + paxHeaders[paxKeyword] = s + return + } + + // Too big: use binary (big-endian). + tw.usedBinary = true + for i := len(b) - 1; x > 0 && i >= 0; i-- { + b[i] = byte(x) + x >>= 8 + } + b[0] |= 0x80 // highest bit indicates binary format +} + +var ( + minTime = time.Unix(0, 0) + // There is room for 11 octal digits (33 bits) of mtime. + maxTime = minTime.Add((1<<33 - 1) * time.Second) +) + +// WriteHeader writes hdr and prepares to accept the file's contents. +// WriteHeader calls Flush if it is not the first header. +// Calling after a Close will return ErrWriteAfterClose. +func (tw *Writer) WriteHeader(hdr *Header) error { + return tw.writeHeader(hdr, true) +} + +// WriteHeader writes hdr and prepares to accept the file's contents. +// WriteHeader calls Flush if it is not the first header. +// Calling after a Close will return ErrWriteAfterClose. +// As this method is called internally by writePax header to allow it to +// suppress writing the pax header. +func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { + if tw.closed { + return ErrWriteAfterClose + } + if tw.err == nil { + tw.Flush() + } + if tw.err != nil { + return tw.err + } + + // a map to hold pax header records, if any are needed + paxHeaders := make(map[string]string) + + // TODO(shanemhansen): we might want to use PAX headers for + // subsecond time resolution, but for now let's just capture + // too long fields or non ascii characters + + var header []byte + + // We need to select which scratch buffer to use carefully, + // since this method is called recursively to write PAX headers. + // If allowPax is true, this is the non-recursive call, and we will use hdrBuff. + // If allowPax is false, we are being called by writePAXHeader, and hdrBuff is + // already being used by the non-recursive call, so we must use paxHdrBuff. + header = tw.hdrBuff[:] + if !allowPax { + header = tw.paxHdrBuff[:] + } + copy(header, zeroBlock) + s := slicer(header) + + // keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax + pathHeaderBytes := s.next(fileNameSize) + + tw.cString(pathHeaderBytes, hdr.Name, true, paxPath, paxHeaders) + + // Handle out of range ModTime carefully. + var modTime int64 + if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) { + modTime = hdr.ModTime.Unix() + } + + tw.octal(s.next(8), hdr.Mode) // 100:108 + tw.numeric(s.next(8), int64(hdr.Uid), true, paxUid, paxHeaders) // 108:116 + tw.numeric(s.next(8), int64(hdr.Gid), true, paxGid, paxHeaders) // 116:124 + tw.numeric(s.next(12), hdr.Size, true, paxSize, paxHeaders) // 124:136 + tw.numeric(s.next(12), modTime, false, paxNone, nil) // 136:148 --- consider using pax for finer granularity + s.next(8) // chksum (148:156) + s.next(1)[0] = hdr.Typeflag // 156:157 + + tw.cString(s.next(100), hdr.Linkname, true, paxLinkpath, paxHeaders) + + copy(s.next(8), []byte("ustar\x0000")) // 257:265 + tw.cString(s.next(32), hdr.Uname, true, paxUname, paxHeaders) // 265:297 + tw.cString(s.next(32), hdr.Gname, true, paxGname, paxHeaders) // 297:329 + tw.numeric(s.next(8), hdr.Devmajor, false, paxNone, nil) // 329:337 + tw.numeric(s.next(8), hdr.Devminor, false, paxNone, nil) // 337:345 + + // keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax + prefixHeaderBytes := s.next(155) + tw.cString(prefixHeaderBytes, "", false, paxNone, nil) // 345:500 prefix + + // Use the GNU magic instead of POSIX magic if we used any GNU extensions. + if tw.usedBinary { + copy(header[257:265], []byte("ustar \x00")) + } + + _, paxPathUsed := paxHeaders[paxPath] + // try to use a ustar header when only the name is too long + if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed { + suffix := hdr.Name + prefix := "" + if len(hdr.Name) > fileNameSize && isASCII(hdr.Name) { + var err error + prefix, suffix, err = tw.splitUSTARLongName(hdr.Name) + if err == nil { + // ok we can use a ustar long name instead of pax, now correct the fields + + // remove the path field from the pax header. this will suppress the pax header + delete(paxHeaders, paxPath) + + // update the path fields + tw.cString(pathHeaderBytes, suffix, false, paxNone, nil) + tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil) + + // Use the ustar magic if we used ustar long names. + if len(prefix) > 0 && !tw.usedBinary { + copy(header[257:265], []byte("ustar\x00")) + } + } + } + } + + // The chksum field is terminated by a NUL and a space. + // This is different from the other octal fields. + chksum, _ := checksum(header) + tw.octal(header[148:155], chksum) + header[155] = ' ' + + if tw.err != nil { + // problem with header; probably integer too big for a field. + return tw.err + } + + if allowPax { + for k, v := range hdr.Xattrs { + paxHeaders[paxXattr+k] = v + } + } + + if len(paxHeaders) > 0 { + if !allowPax { + return errInvalidHeader + } + if err := tw.writePAXHeader(hdr, paxHeaders); err != nil { + return err + } + } + tw.nb = int64(hdr.Size) + tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize + + _, tw.err = tw.w.Write(header) + return tw.err +} + +// writeUSTARLongName splits a USTAR long name hdr.Name. +// name must be < 256 characters. errNameTooLong is returned +// if hdr.Name can't be split. The splitting heuristic +// is compatible with gnu tar. +func (tw *Writer) splitUSTARLongName(name string) (prefix, suffix string, err error) { + length := len(name) + if length > fileNamePrefixSize+1 { + length = fileNamePrefixSize + 1 + } else if name[length-1] == '/' { + length-- + } + i := strings.LastIndex(name[:length], "/") + // nlen contains the resulting length in the name field. + // plen contains the resulting length in the prefix field. + nlen := len(name) - i - 1 + plen := i + if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize { + err = errNameTooLong + return + } + prefix, suffix = name[:i], name[i+1:] + return +} + +// writePaxHeader writes an extended pax header to the +// archive. +func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) error { + // Prepare extended header + ext := new(Header) + ext.Typeflag = TypeXHeader + // Setting ModTime is required for reader parsing to + // succeed, and seems harmless enough. + ext.ModTime = hdr.ModTime + // The spec asks that we namespace our pseudo files + // with the current pid. + pid := os.Getpid() + dir, file := path.Split(hdr.Name) + fullName := path.Join(dir, + fmt.Sprintf("PaxHeaders.%d", pid), file) + + ascii := toASCII(fullName) + if len(ascii) > 100 { + ascii = ascii[:100] + } + ext.Name = ascii + // Construct the body + var buf bytes.Buffer + + for k, v := range paxHeaders { + fmt.Fprint(&buf, paxHeader(k+"="+v)) + } + + ext.Size = int64(len(buf.Bytes())) + if err := tw.writeHeader(ext, false); err != nil { + return err + } + if _, err := tw.Write(buf.Bytes()); err != nil { + return err + } + if err := tw.Flush(); err != nil { + return err + } + return nil +} + +// paxHeader formats a single pax record, prefixing it with the appropriate length +func paxHeader(msg string) string { + const padding = 2 // Extra padding for space and newline + size := len(msg) + padding + size += len(strconv.Itoa(size)) + record := fmt.Sprintf("%d %s\n", size, msg) + if len(record) != size { + // Final adjustment if adding size increased + // the number of digits in size + size = len(record) + record = fmt.Sprintf("%d %s\n", size, msg) + } + return record +} + +// Write writes to the current entry in the tar archive. +// Write returns the error ErrWriteTooLong if more than +// hdr.Size bytes are written after WriteHeader. +func (tw *Writer) Write(b []byte) (n int, err error) { + if tw.closed { + err = ErrWriteTooLong + return + } + overwrite := false + if int64(len(b)) > tw.nb { + b = b[0:tw.nb] + overwrite = true + } + n, err = tw.w.Write(b) + tw.nb -= int64(n) + if err == nil && overwrite { + err = ErrWriteTooLong + return + } + tw.err = err + return +} + +// Close closes the tar archive, flushing any unwritten +// data to the underlying writer. +func (tw *Writer) Close() error { + if tw.err != nil || tw.closed { + return tw.err + } + tw.Flush() + tw.closed = true + if tw.err != nil { + return tw.err + } + + // trailer: two zero blocks + for i := 0; i < 2; i++ { + _, tw.err = tw.w.Write(zeroBlock) + if tw.err != nil { + break + } + } + return tw.err +} diff --git a/vendor/src/github.com/vbatts/tar-split/archive/tar/writer_test.go b/vendor/src/github.com/vbatts/tar-split/archive/tar/writer_test.go new file mode 100644 index 0000000000..5e42e322f9 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/archive/tar/writer_test.go @@ -0,0 +1,491 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "os" + "reflect" + "strings" + "testing" + "testing/iotest" + "time" +) + +type writerTestEntry struct { + header *Header + contents string +} + +type writerTest struct { + file string // filename of expected output + entries []*writerTestEntry +} + +var writerTests = []*writerTest{ + // The writer test file was produced with this command: + // tar (GNU tar) 1.26 + // ln -s small.txt link.txt + // tar -b 1 --format=ustar -c -f writer.tar small.txt small2.txt link.txt + { + file: "testdata/writer.tar", + entries: []*writerTestEntry{ + { + header: &Header{ + Name: "small.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 5, + ModTime: time.Unix(1246508266, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + contents: "Kilts", + }, + { + header: &Header{ + Name: "small2.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 11, + ModTime: time.Unix(1245217492, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + contents: "Google.com\n", + }, + { + header: &Header{ + Name: "link.txt", + Mode: 0777, + Uid: 1000, + Gid: 1000, + Size: 0, + ModTime: time.Unix(1314603082, 0), + Typeflag: '2', + Linkname: "small.txt", + Uname: "strings", + Gname: "strings", + }, + // no contents + }, + }, + }, + // The truncated test file was produced using these commands: + // dd if=/dev/zero bs=1048576 count=16384 > /tmp/16gig.txt + // tar -b 1 -c -f- /tmp/16gig.txt | dd bs=512 count=8 > writer-big.tar + { + file: "testdata/writer-big.tar", + entries: []*writerTestEntry{ + { + header: &Header{ + Name: "tmp/16gig.txt", + Mode: 0640, + Uid: 73025, + Gid: 5000, + Size: 16 << 30, + ModTime: time.Unix(1254699560, 0), + Typeflag: '0', + Uname: "dsymonds", + Gname: "eng", + }, + // fake contents + contents: strings.Repeat("\x00", 4<<10), + }, + }, + }, + // The truncated test file was produced using these commands: + // dd if=/dev/zero bs=1048576 count=16384 > (longname/)*15 /16gig.txt + // tar -b 1 -c -f- (longname/)*15 /16gig.txt | dd bs=512 count=8 > writer-big-long.tar + { + file: "testdata/writer-big-long.tar", + entries: []*writerTestEntry{ + { + header: &Header{ + Name: strings.Repeat("longname/", 15) + "16gig.txt", + Mode: 0644, + Uid: 1000, + Gid: 1000, + Size: 16 << 30, + ModTime: time.Unix(1399583047, 0), + Typeflag: '0', + Uname: "guillaume", + Gname: "guillaume", + }, + // fake contents + contents: strings.Repeat("\x00", 4<<10), + }, + }, + }, + // This file was produced using gnu tar 1.17 + // gnutar -b 4 --format=ustar (longname/)*15 + file.txt + { + file: "testdata/ustar.tar", + entries: []*writerTestEntry{ + { + header: &Header{ + Name: strings.Repeat("longname/", 15) + "file.txt", + Mode: 0644, + Uid: 0765, + Gid: 024, + Size: 06, + ModTime: time.Unix(1360135598, 0), + Typeflag: '0', + Uname: "shane", + Gname: "staff", + }, + contents: "hello\n", + }, + }, + }, +} + +// Render byte array in a two-character hexadecimal string, spaced for easy visual inspection. +func bytestr(offset int, b []byte) string { + const rowLen = 32 + s := fmt.Sprintf("%04x ", offset) + for _, ch := range b { + switch { + case '0' <= ch && ch <= '9', 'A' <= ch && ch <= 'Z', 'a' <= ch && ch <= 'z': + s += fmt.Sprintf(" %c", ch) + default: + s += fmt.Sprintf(" %02x", ch) + } + } + return s +} + +// Render a pseudo-diff between two blocks of bytes. +func bytediff(a []byte, b []byte) string { + const rowLen = 32 + s := fmt.Sprintf("(%d bytes vs. %d bytes)\n", len(a), len(b)) + for offset := 0; len(a)+len(b) > 0; offset += rowLen { + na, nb := rowLen, rowLen + if na > len(a) { + na = len(a) + } + if nb > len(b) { + nb = len(b) + } + sa := bytestr(offset, a[0:na]) + sb := bytestr(offset, b[0:nb]) + if sa != sb { + s += fmt.Sprintf("-%v\n+%v\n", sa, sb) + } + a = a[na:] + b = b[nb:] + } + return s +} + +func TestWriter(t *testing.T) { +testLoop: + for i, test := range writerTests { + expected, err := ioutil.ReadFile(test.file) + if err != nil { + t.Errorf("test %d: Unexpected error: %v", i, err) + continue + } + + buf := new(bytes.Buffer) + tw := NewWriter(iotest.TruncateWriter(buf, 4<<10)) // only catch the first 4 KB + big := false + for j, entry := range test.entries { + big = big || entry.header.Size > 1<<10 + if err := tw.WriteHeader(entry.header); err != nil { + t.Errorf("test %d, entry %d: Failed writing header: %v", i, j, err) + continue testLoop + } + if _, err := io.WriteString(tw, entry.contents); err != nil { + t.Errorf("test %d, entry %d: Failed writing contents: %v", i, j, err) + continue testLoop + } + } + // Only interested in Close failures for the small tests. + if err := tw.Close(); err != nil && !big { + t.Errorf("test %d: Failed closing archive: %v", i, err) + continue testLoop + } + + actual := buf.Bytes() + if !bytes.Equal(expected, actual) { + t.Errorf("test %d: Incorrect result: (-=expected, +=actual)\n%v", + i, bytediff(expected, actual)) + } + if testing.Short() { // The second test is expensive. + break + } + } +} + +func TestPax(t *testing.T) { + // Create an archive with a large name + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + hdr, err := FileInfoHeader(fileinfo, "") + if err != nil { + t.Fatalf("os.Stat: %v", err) + } + // Force a PAX long name to be written + longName := strings.Repeat("ab", 100) + contents := strings.Repeat(" ", int(hdr.Size)) + hdr.Name = longName + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if _, err = writer.Write([]byte(contents)); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Simple test to make sure PAX extensions are in effect + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) { + t.Fatal("Expected at least one PAX header to be written.") + } + // Test that we can get a long name back out of the archive. + reader := NewReader(&buf) + hdr, err = reader.Next() + if err != nil { + t.Fatal(err) + } + if hdr.Name != longName { + t.Fatal("Couldn't recover long file name") + } +} + +func TestPaxSymlink(t *testing.T) { + // Create an archive with a large linkname + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + hdr, err := FileInfoHeader(fileinfo, "") + hdr.Typeflag = TypeSymlink + if err != nil { + t.Fatalf("os.Stat:1 %v", err) + } + // Force a PAX long linkname to be written + longLinkname := strings.Repeat("1234567890/1234567890", 10) + hdr.Linkname = longLinkname + + hdr.Size = 0 + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Simple test to make sure PAX extensions are in effect + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) { + t.Fatal("Expected at least one PAX header to be written.") + } + // Test that we can get a long name back out of the archive. + reader := NewReader(&buf) + hdr, err = reader.Next() + if err != nil { + t.Fatal(err) + } + if hdr.Linkname != longLinkname { + t.Fatal("Couldn't recover long link name") + } +} + +func TestPaxNonAscii(t *testing.T) { + // Create an archive with non ascii. These should trigger a pax header + // because pax headers have a defined utf-8 encoding. + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + + hdr, err := FileInfoHeader(fileinfo, "") + if err != nil { + t.Fatalf("os.Stat:1 %v", err) + } + + // some sample data + chineseFilename := "文件名" + chineseGroupname := "組" + chineseUsername := "用戶名" + + hdr.Name = chineseFilename + hdr.Gname = chineseGroupname + hdr.Uname = chineseUsername + + contents := strings.Repeat(" ", int(hdr.Size)) + + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if _, err = writer.Write([]byte(contents)); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Simple test to make sure PAX extensions are in effect + if !bytes.Contains(buf.Bytes(), []byte("PaxHeaders.")) { + t.Fatal("Expected at least one PAX header to be written.") + } + // Test that we can get a long name back out of the archive. + reader := NewReader(&buf) + hdr, err = reader.Next() + if err != nil { + t.Fatal(err) + } + if hdr.Name != chineseFilename { + t.Fatal("Couldn't recover unicode name") + } + if hdr.Gname != chineseGroupname { + t.Fatal("Couldn't recover unicode group") + } + if hdr.Uname != chineseUsername { + t.Fatal("Couldn't recover unicode user") + } +} + +func TestPaxXattrs(t *testing.T) { + xattrs := map[string]string{ + "user.key": "value", + } + + // Create an archive with an xattr + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + hdr, err := FileInfoHeader(fileinfo, "") + if err != nil { + t.Fatalf("os.Stat: %v", err) + } + contents := "Kilts" + hdr.Xattrs = xattrs + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if _, err = writer.Write([]byte(contents)); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Test that we can get the xattrs back out of the archive. + reader := NewReader(&buf) + hdr, err = reader.Next() + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(hdr.Xattrs, xattrs) { + t.Fatalf("xattrs did not survive round trip: got %+v, want %+v", + hdr.Xattrs, xattrs) + } +} + +func TestPAXHeader(t *testing.T) { + medName := strings.Repeat("CD", 50) + longName := strings.Repeat("AB", 100) + paxTests := [][2]string{ + {paxPath + "=/etc/hosts", "19 path=/etc/hosts\n"}, + {"a=b", "6 a=b\n"}, // Single digit length + {"a=names", "11 a=names\n"}, // Test case involving carries + {paxPath + "=" + longName, fmt.Sprintf("210 path=%s\n", longName)}, + {paxPath + "=" + medName, fmt.Sprintf("110 path=%s\n", medName)}} + + for _, test := range paxTests { + key, expected := test[0], test[1] + if result := paxHeader(key); result != expected { + t.Fatalf("paxHeader: got %s, expected %s", result, expected) + } + } +} + +func TestUSTARLongName(t *testing.T) { + // Create an archive with a path that failed to split with USTAR extension in previous versions. + fileinfo, err := os.Stat("testdata/small.txt") + if err != nil { + t.Fatal(err) + } + hdr, err := FileInfoHeader(fileinfo, "") + hdr.Typeflag = TypeDir + if err != nil { + t.Fatalf("os.Stat:1 %v", err) + } + // Force a PAX long name to be written. The name was taken from a practical example + // that fails and replaced ever char through numbers to anonymize the sample. + longName := "/0000_0000000/00000-000000000/0000_0000000/00000-0000000000000/0000_0000000/00000-0000000-00000000/0000_0000000/00000000/0000_0000000/000/0000_0000000/00000000v00/0000_0000000/000000/0000_0000000/0000000/0000_0000000/00000y-00/0000/0000/00000000/0x000000/" + hdr.Name = longName + + hdr.Size = 0 + var buf bytes.Buffer + writer := NewWriter(&buf) + if err := writer.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if err := writer.Close(); err != nil { + t.Fatal(err) + } + // Test that we can get a long name back out of the archive. + reader := NewReader(&buf) + hdr, err = reader.Next() + if err != nil { + t.Fatal(err) + } + if hdr.Name != longName { + t.Fatal("Couldn't recover long name") + } +} + +func TestValidTypeflagWithPAXHeader(t *testing.T) { + var buffer bytes.Buffer + tw := NewWriter(&buffer) + + fileName := strings.Repeat("ab", 100) + + hdr := &Header{ + Name: fileName, + Size: 4, + Typeflag: 0, + } + if err := tw.WriteHeader(hdr); err != nil { + t.Fatalf("Failed to write header: %s", err) + } + if _, err := tw.Write([]byte("fooo")); err != nil { + t.Fatalf("Failed to write the file's data: %s", err) + } + tw.Close() + + tr := NewReader(&buffer) + + for { + header, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("Failed to read header: %s", err) + } + if header.Typeflag != 0 { + t.Fatalf("Typeflag should've been 0, found %d", header.Typeflag) + } + } +} diff --git a/vendor/src/github.com/vbatts/tar-split/checksize.go b/vendor/src/github.com/vbatts/tar-split/checksize.go new file mode 100644 index 0000000000..a6d3c0827a --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/checksize.go @@ -0,0 +1,111 @@ +// +build ignore + +package main + +import ( + "archive/tar" + "compress/gzip" + "flag" + "fmt" + "io" + "io/ioutil" + "log" + "os" + + "github.com/vbatts/tar-split/tar/asm" + "github.com/vbatts/tar-split/tar/storage" +) + +var ( + flCleanup = flag.Bool("cleanup", true, "cleanup tempfiles") +) + +func main() { + flag.Parse() + + for _, arg := range flag.Args() { + fh, err := os.Open(arg) + if err != nil { + log.Fatal(err) + } + defer fh.Close() + fi, err := fh.Stat() + if err != nil { + log.Fatal(err) + } + fmt.Printf("inspecting %q (size %dk)\n", fh.Name(), fi.Size()/1024) + + packFh, err := ioutil.TempFile("", "packed.") + if err != nil { + log.Fatal(err) + } + defer packFh.Close() + if *flCleanup { + defer os.Remove(packFh.Name()) + } + + sp := storage.NewJSONPacker(packFh) + fp := storage.NewDiscardFilePutter() + dissam, err := asm.NewInputTarStream(fh, sp, fp) + if err != nil { + log.Fatal(err) + } + + var num int + tr := tar.NewReader(dissam) + for { + _, err = tr.Next() + if err != nil { + if err == io.EOF { + break + } + log.Fatal(err) + } + num++ + if _, err := io.Copy(ioutil.Discard, tr); err != nil { + log.Fatal(err) + } + } + fmt.Printf(" -- number of files: %d\n", num) + + if err := packFh.Sync(); err != nil { + log.Fatal(err) + } + + fi, err = packFh.Stat() + if err != nil { + log.Fatal(err) + } + fmt.Printf(" -- size of metadata uncompressed: %dk\n", fi.Size()/1024) + + gzPackFh, err := ioutil.TempFile("", "packed.gz.") + if err != nil { + log.Fatal(err) + } + defer gzPackFh.Close() + if *flCleanup { + defer os.Remove(gzPackFh.Name()) + } + + gzWrtr := gzip.NewWriter(gzPackFh) + + if _, err := packFh.Seek(0, 0); err != nil { + log.Fatal(err) + } + + if _, err := io.Copy(gzWrtr, packFh); err != nil { + log.Fatal(err) + } + gzWrtr.Close() + + if err := gzPackFh.Sync(); err != nil { + log.Fatal(err) + } + + fi, err = gzPackFh.Stat() + if err != nil { + log.Fatal(err) + } + fmt.Printf(" -- size of gzip compressed metadata: %dk\n", fi.Size()/1024) + } +} diff --git a/vendor/src/github.com/vbatts/tar-split/cmd/tar-split/README.md b/vendor/src/github.com/vbatts/tar-split/cmd/tar-split/README.md new file mode 100644 index 0000000000..5451be0f98 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/cmd/tar-split/README.md @@ -0,0 +1,25 @@ +## tar-split utility + + +## Usage + +### Disassembly + +```bash +$ sha256sum archive.tar +d734a748db93ec873392470510b8a1c88929abd8fae2540dc43d5b26f7537868 archive.tar +$ mkdir ./x +$ tar-split d --output tar-data.json.gz ./archive.tar | tar -C ./x -x +time="2015-07-20T15:45:04-04:00" level=info msg="created tar-data.json.gz from ./archive.tar (read 204800 bytes)" +``` + +### Assembly + +```bash +$ tar-split a --output new.tar --input ./tar-data.json.gz --path ./x/ +INFO[0000] created new.tar from ./x/ and ./tar-data.json.gz (wrote 204800 bytes) +$ sha256sum new.tar +d734a748db93ec873392470510b8a1c88929abd8fae2540dc43d5b26f7537868 new.tar +``` + + diff --git a/vendor/src/github.com/vbatts/tar-split/cmd/tar-split/main.go b/vendor/src/github.com/vbatts/tar-split/cmd/tar-split/main.go new file mode 100644 index 0000000000..8c631b0367 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/cmd/tar-split/main.go @@ -0,0 +1,175 @@ +// go:generate git tag | tail -1 +package main + +import ( + "compress/gzip" + "io" + "os" + + "github.com/Sirupsen/logrus" + "github.com/codegangsta/cli" + "github.com/vbatts/tar-split/tar/asm" + "github.com/vbatts/tar-split/tar/storage" +) + +func main() { + app := cli.NewApp() + app.Name = "tar-split" + app.Usage = "tar assembly and disassembly utility" + app.Version = "0.9.2" + app.Author = "Vincent Batts" + app.Email = "vbatts@hashbangbash.com" + app.Action = cli.ShowAppHelp + app.Before = func(c *cli.Context) error { + logrus.SetOutput(os.Stderr) + if c.Bool("debug") { + logrus.SetLevel(logrus.DebugLevel) + } + return nil + } + app.Flags = []cli.Flag{ + cli.BoolFlag{ + Name: "debug, D", + Usage: "debug output", + // defaults to false + }, + } + app.Commands = []cli.Command{ + { + Name: "disasm", + Aliases: []string{"d"}, + Usage: "disassemble the input tar stream", + Action: CommandDisasm, + Flags: []cli.Flag{ + cli.StringFlag{ + Name: "output", + Value: "tar-data.json.gz", + Usage: "output of disassembled tar stream", + }, + }, + }, + { + Name: "asm", + Aliases: []string{"a"}, + Usage: "assemble tar stream", + Action: CommandAsm, + Flags: []cli.Flag{ + cli.StringFlag{ + Name: "input", + Value: "tar-data.json.gz", + Usage: "input of disassembled tar stream", + }, + cli.StringFlag{ + Name: "output", + Value: "-", + Usage: "reassembled tar archive", + }, + cli.StringFlag{ + Name: "path", + Value: "", + Usage: "relative path of extracted tar", + }, + }, + }, + } + + if err := app.Run(os.Args); err != nil { + logrus.Fatal(err) + } +} + +func CommandDisasm(c *cli.Context) { + if len(c.Args()) != 1 { + logrus.Fatalf("please specify tar to be disabled ") + } + if len(c.String("output")) == 0 { + logrus.Fatalf("--output filename must be set") + } + + // Set up the tar input stream + var inputStream io.Reader + if c.Args()[0] == "-" { + inputStream = os.Stdin + } else { + fh, err := os.Open(c.Args()[0]) + if err != nil { + logrus.Fatal(err) + } + defer fh.Close() + inputStream = fh + } + + // Set up the metadata storage + mf, err := os.OpenFile(c.String("output"), os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(0600)) + if err != nil { + logrus.Fatal(err) + } + defer mf.Close() + mfz := gzip.NewWriter(mf) + defer mfz.Close() + metaPacker := storage.NewJSONPacker(mfz) + + // we're passing nil here for the file putter, because the ApplyDiff will + // handle the extraction of the archive + its, err := asm.NewInputTarStream(inputStream, metaPacker, nil) + if err != nil { + logrus.Fatal(err) + } + i, err := io.Copy(os.Stdout, its) + if err != nil { + logrus.Fatal(err) + } + logrus.Infof("created %s from %s (read %d bytes)", c.String("output"), c.Args()[0], i) +} + +func CommandAsm(c *cli.Context) { + if len(c.Args()) > 0 { + logrus.Warnf("%d additional arguments passed are ignored", len(c.Args())) + } + if len(c.String("input")) == 0 { + logrus.Fatalf("--input filename must be set") + } + if len(c.String("output")) == 0 { + logrus.Fatalf("--output filename must be set ([FILENAME|-])") + } + if len(c.String("path")) == 0 { + logrus.Fatalf("--path must be set") + } + + var outputStream io.Writer + if c.String("output") == "-" { + outputStream = os.Stdout + } else { + fh, err := os.Create(c.String("output")) + if err != nil { + logrus.Fatal(err) + } + defer fh.Close() + outputStream = fh + } + + // Get the tar metadata reader + mf, err := os.Open(c.String("input")) + if err != nil { + logrus.Fatal(err) + } + defer mf.Close() + mfz, err := gzip.NewReader(mf) + if err != nil { + logrus.Fatal(err) + } + defer mfz.Close() + + metaUnpacker := storage.NewJSONUnpacker(mfz) + // XXX maybe get the absolute path here + fileGetter := storage.NewPathFileGetter(c.String("path")) + + ots := asm.NewOutputTarStream(fileGetter, metaUnpacker) + defer ots.Close() + i, err := io.Copy(outputStream, ots) + if err != nil { + logrus.Fatal(err) + } + + logrus.Infof("created %s from %s and %s (wrote %d bytes)", c.String("output"), c.String("path"), c.String("input"), i) +} diff --git a/vendor/src/github.com/vbatts/tar-split/concept/main.go b/vendor/src/github.com/vbatts/tar-split/concept/main.go new file mode 100644 index 0000000000..36bdb404a6 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/concept/main.go @@ -0,0 +1,91 @@ +// +build ignore + +package main + +import ( + "flag" + "fmt" + "io" + "io/ioutil" + "log" + "os" + + "github.com/vbatts/tar-split/archive/tar" +) + +func main() { + flag.Parse() + log.SetOutput(os.Stderr) + for _, arg := range flag.Args() { + func() { + // Open the tar archive + fh, err := os.Open(arg) + if err != nil { + log.Fatal(err, arg) + } + defer fh.Close() + + output, err := os.Create(fmt.Sprintf("%s.out", arg)) + if err != nil { + log.Fatal(err) + } + defer output.Close() + log.Printf("writing %q to %q", fh.Name(), output.Name()) + + fi, err := fh.Stat() + if err != nil { + log.Fatal(err, fh.Name()) + } + size := fi.Size() + var sum int64 + tr := tar.NewReader(fh) + tr.RawAccounting = true + for { + hdr, err := tr.Next() + if err != nil { + if err != io.EOF { + log.Println(err) + } + // even when an EOF is reached, there is often 1024 null bytes on + // the end of an archive. Collect them too. + post := tr.RawBytes() + output.Write(post) + sum += int64(len(post)) + + fmt.Printf("EOF padding: %d\n", len(post)) + break + } + + pre := tr.RawBytes() + output.Write(pre) + sum += int64(len(pre)) + + var i int64 + if i, err = io.Copy(output, tr); err != nil { + log.Println(err) + break + } + sum += i + + fmt.Println(hdr.Name, "pre:", len(pre), "read:", i) + } + + // it is allowable, and not uncommon that there is further padding on the + // end of an archive, apart from the expected 1024 null bytes + remainder, err := ioutil.ReadAll(fh) + if err != nil && err != io.EOF { + log.Fatal(err, fh.Name()) + } + output.Write(remainder) + sum += int64(len(remainder)) + fmt.Printf("Remainder: %d\n", len(remainder)) + + if size != sum { + fmt.Printf("Size: %d; Sum: %d; Diff: %d\n", size, sum, size-sum) + fmt.Printf("Compare like `cmp -bl %s %s | less`\n", fh.Name(), output.Name()) + } else { + fmt.Printf("Size: %d; Sum: %d\n", size, sum) + } + }() + } +} diff --git a/vendor/src/github.com/vbatts/tar-split/tar/asm/README.md b/vendor/src/github.com/vbatts/tar-split/tar/asm/README.md new file mode 100644 index 0000000000..2a3a5b56a6 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/tar/asm/README.md @@ -0,0 +1,44 @@ +asm +=== + +This library for assembly and disassembly of tar archives, facilitated by +`github.com/vbatts/tar-split/tar/storage`. + + +Concerns +-------- + +For completely safe assembly/disassembly, there will need to be a Content +Addressable Storage (CAS) directory, that maps to a checksum in the +`storage.Entity` of `storage.FileType`. + +This is due to the fact that tar archives _can_ allow multiple records for the +same path, but the last one effectively wins. Even if the prior records had a +different payload. + +In this way, when assembling an archive from relative paths, if the archive has +multiple entries for the same path, then all payloads read in from a relative +path would be identical. + + +Thoughts +-------- + +Have a look-aside directory or storage. This way when a clobbering record is +encountered from the tar stream, then the payload of the prior/existing file is +stored to the CAS. This way the clobbering record's file payload can be +extracted, but we'll have preserved the payload needed to reassemble a precise +tar archive. + +clobbered/path/to/file.[0-N] + +*alternatively* + +We could just _not_ support tar streams that have clobbering file paths. +Appending records to the archive is not incredibly common, and doesn't happen +by default for most implementations. Not supporting them wouldn't be a +security concern either, as if it did occur, we would reassemble an archive +that doesn't validate signature/checksum, so it shouldn't be trusted anyway. + +Otherwise, this will allow us to defer support for appended files as a FUTURE FEATURE. + diff --git a/vendor/src/github.com/vbatts/tar-split/tar/asm/assemble.go b/vendor/src/github.com/vbatts/tar-split/tar/asm/assemble.go new file mode 100644 index 0000000000..1bef97bc35 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/tar/asm/assemble.go @@ -0,0 +1,68 @@ +package asm + +import ( + "bytes" + "fmt" + "hash/crc64" + "io" + + "github.com/vbatts/tar-split/tar/storage" +) + +// NewOutputTarStream returns an io.ReadCloser that is an assemble tar archive +// stream. +// +// It takes a storage.FileGetter, for mapping the file payloads that are to be read in, +// and a storage.Unpacker, which has access to the rawbytes and file order +// metadata. With the combination of these two items, a precise assembled Tar +// archive is possible. +func NewOutputTarStream(fg storage.FileGetter, up storage.Unpacker) io.ReadCloser { + // ... Since these are interfaces, this is possible, so let's not have a nil pointer + if fg == nil || up == nil { + return nil + } + pr, pw := io.Pipe() + go func() { + for { + entry, err := up.Next() + if err != nil { + pw.CloseWithError(err) + return + } + switch entry.Type { + case storage.SegmentType: + if _, err := pw.Write(entry.Payload); err != nil { + pw.CloseWithError(err) + return + } + case storage.FileType: + if entry.Size == 0 { + continue + } + fh, err := fg.Get(entry.Name) + if err != nil { + pw.CloseWithError(err) + return + } + c := crc64.New(storage.CRCTable) + tRdr := io.TeeReader(fh, c) + if _, err := io.Copy(pw, tRdr); err != nil { + fh.Close() + pw.CloseWithError(err) + return + } + if !bytes.Equal(c.Sum(nil), entry.Payload) { + // I would rather this be a comparable ErrInvalidChecksum or such, + // but since it's coming through the PipeReader, the context of + // _which_ file would be lost... + fh.Close() + pw.CloseWithError(fmt.Errorf("file integrity checksum failed for %q", entry.Name)) + return + } + fh.Close() + } + } + pw.Close() + }() + return pr +} diff --git a/vendor/src/github.com/vbatts/tar-split/tar/asm/assemble_test.go b/vendor/src/github.com/vbatts/tar-split/tar/asm/assemble_test.go new file mode 100644 index 0000000000..7cf44dc161 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/tar/asm/assemble_test.go @@ -0,0 +1,181 @@ +package asm + +import ( + "bytes" + "compress/gzip" + "crypto/sha1" + "fmt" + "hash/crc64" + "io" + "io/ioutil" + "os" + "testing" + + "github.com/vbatts/tar-split/tar/storage" +) + +var entries = []struct { + Entry storage.Entry + Body []byte +}{ + { + Entry: storage.Entry{ + Type: storage.FileType, + Name: "./hurr.txt", + Payload: []byte{2, 116, 164, 177, 171, 236, 107, 78}, + Size: 20, + }, + Body: []byte("imma hurr til I derp"), + }, + { + Entry: storage.Entry{ + Type: storage.FileType, + Name: "./ermahgerd.txt", + Payload: []byte{126, 72, 89, 239, 230, 252, 160, 187}, + Size: 26, + }, + Body: []byte("café con leche, por favor"), + }, +} +var entriesMangled = []struct { + Entry storage.Entry + Body []byte +}{ + { + Entry: storage.Entry{ + Type: storage.FileType, + Name: "./hurr.txt", + Payload: []byte{3, 116, 164, 177, 171, 236, 107, 78}, + Size: 20, + }, + // switch + Body: []byte("imma derp til I hurr"), + }, + { + Entry: storage.Entry{ + Type: storage.FileType, + Name: "./ermahgerd.txt", + Payload: []byte{127, 72, 89, 239, 230, 252, 160, 187}, + Size: 26, + }, + // san not con + Body: []byte("café sans leche, por favor"), + }, +} + +func TestTarStreamMangledGetterPutter(t *testing.T) { + fgp := storage.NewBufferFileGetPutter() + + // first lets prep a GetPutter and Packer + for i := range entries { + if entries[i].Entry.Type == storage.FileType { + j, csum, err := fgp.Put(entries[i].Entry.Name, bytes.NewBuffer(entries[i].Body)) + if err != nil { + t.Error(err) + } + if j != entries[i].Entry.Size { + t.Errorf("size %q: expected %d; got %d", + entries[i].Entry.Name, + entries[i].Entry.Size, + j) + } + if !bytes.Equal(csum, entries[i].Entry.Payload) { + t.Errorf("checksum %q: expected %v; got %v", + entries[i].Entry.Name, + entries[i].Entry.Payload, + csum) + } + } + } + + for _, e := range entriesMangled { + if e.Entry.Type == storage.FileType { + rdr, err := fgp.Get(e.Entry.Name) + if err != nil { + t.Error(err) + } + c := crc64.New(storage.CRCTable) + i, err := io.Copy(c, rdr) + if err != nil { + t.Fatal(err) + } + rdr.Close() + + csum := c.Sum(nil) + if bytes.Equal(csum, e.Entry.Payload) { + t.Errorf("wrote %d bytes. checksum for %q should not have matched! %v", + i, + e.Entry.Name, + csum) + } + } + } +} + +func TestTarStream(t *testing.T) { + var ( + expectedSum = "1eb237ff69bca6e22789ecb05b45d35ca307adbd" + expectedSize int64 = 10240 + ) + + fh, err := os.Open("./testdata/t.tar.gz") + if err != nil { + t.Fatal(err) + } + defer fh.Close() + gzRdr, err := gzip.NewReader(fh) + if err != nil { + t.Fatal(err) + } + defer gzRdr.Close() + + // Setup where we'll store the metadata + w := bytes.NewBuffer([]byte{}) + sp := storage.NewJSONPacker(w) + fgp := storage.NewBufferFileGetPutter() + + // wrap the disassembly stream + tarStream, err := NewInputTarStream(gzRdr, sp, fgp) + if err != nil { + t.Fatal(err) + } + + // get a sum of the stream after it has passed through to ensure it's the same. + h0 := sha1.New() + tRdr0 := io.TeeReader(tarStream, h0) + + // read it all to the bit bucket + i, err := io.Copy(ioutil.Discard, tRdr0) + if err != nil { + t.Fatal(err) + } + + if i != expectedSize { + t.Errorf("size of tar: expected %d; got %d", expectedSize, i) + } + if fmt.Sprintf("%x", h0.Sum(nil)) != expectedSum { + t.Fatalf("checksum of tar: expected %s; got %x", expectedSum, h0.Sum(nil)) + } + + t.Logf("%s", w.String()) // if we fail, then show the packed info + + // If we've made it this far, then we'll turn it around and create a tar + // stream from the packed metadata and buffered file contents. + r := bytes.NewBuffer(w.Bytes()) + sup := storage.NewJSONUnpacker(r) + // and reuse the fgp that we Put the payloads to. + + rc := NewOutputTarStream(fgp, sup) + h1 := sha1.New() + i, err = io.Copy(h1, rc) + if err != nil { + t.Fatal(err) + } + + if i != expectedSize { + t.Errorf("size of output tar: expected %d; got %d", expectedSize, i) + } + if fmt.Sprintf("%x", h1.Sum(nil)) != expectedSum { + t.Fatalf("checksum of output tar: expected %s; got %x", expectedSum, h1.Sum(nil)) + } +} diff --git a/vendor/src/github.com/vbatts/tar-split/tar/asm/disassemble.go b/vendor/src/github.com/vbatts/tar-split/tar/asm/disassemble.go new file mode 100644 index 0000000000..785e1942be --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/tar/asm/disassemble.go @@ -0,0 +1,133 @@ +package asm + +import ( + "io" + "io/ioutil" + + "github.com/vbatts/tar-split/archive/tar" + "github.com/vbatts/tar-split/tar/storage" +) + +// NewInputTarStream wraps the Reader stream of a tar archive and provides a +// Reader stream of the same. +// +// In the middle it will pack the segments and file metadata to storage.Packer +// `p`. +// +// The the storage.FilePutter is where payload of files in the stream are +// stashed. If this stashing is not needed, you can provide a nil +// storage.FilePutter. Since the checksumming is still needed, then a default +// of NewDiscardFilePutter will be used internally +func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io.Reader, error) { + // What to do here... folks will want their own access to the Reader that is + // their tar archive stream, but we'll need that same stream to use our + // forked 'archive/tar'. + // Perhaps do an io.TeeReader that hand back an io.Reader for them to read + // from, and we'll mitm the stream to store metadata. + // We'll need a storage.FilePutter too ... + + // Another concern, whether to do any storage.FilePutter operations, such that we + // don't extract any amount of the archive. But then again, we're not making + // files/directories, hardlinks, etc. Just writing the io to the storage.FilePutter. + // Perhaps we have a DiscardFilePutter that is a bit bucket. + + // we'll return the pipe reader, since TeeReader does not buffer and will + // only read what the outputRdr Read's. Since Tar archive's have padding on + // the end, we want to be the one reading the padding, even if the user's + // `archive/tar` doesn't care. + pR, pW := io.Pipe() + outputRdr := io.TeeReader(r, pW) + + // we need a putter that will generate the crc64 sums of file payloads + if fp == nil { + fp = storage.NewDiscardFilePutter() + } + + go func() { + tr := tar.NewReader(outputRdr) + tr.RawAccounting = true + for { + hdr, err := tr.Next() + if err != nil { + if err != io.EOF { + pW.CloseWithError(err) + return + } + // even when an EOF is reached, there is often 1024 null bytes on + // the end of an archive. Collect them too. + _, err := p.AddEntry(storage.Entry{ + Type: storage.SegmentType, + Payload: tr.RawBytes(), + }) + if err != nil { + pW.CloseWithError(err) + return + } + break // not return. We need the end of the reader. + } + if hdr == nil { + break // not return. We need the end of the reader. + } + + if _, err := p.AddEntry(storage.Entry{ + Type: storage.SegmentType, + Payload: tr.RawBytes(), + }); err != nil { + pW.CloseWithError(err) + return + } + + var csum []byte + if hdr.Size > 0 { + var err error + _, csum, err = fp.Put(hdr.Name, tr) + if err != nil { + pW.CloseWithError(err) + return + } + } + + // File entries added, regardless of size + _, err = p.AddEntry(storage.Entry{ + Type: storage.FileType, + Name: hdr.Name, + Size: hdr.Size, + Payload: csum, + }) + if err != nil { + pW.CloseWithError(err) + return + } + + if b := tr.RawBytes(); len(b) > 0 { + _, err = p.AddEntry(storage.Entry{ + Type: storage.SegmentType, + Payload: b, + }) + if err != nil { + pW.CloseWithError(err) + return + } + } + } + + // it is allowable, and not uncommon that there is further padding on the + // end of an archive, apart from the expected 1024 null bytes. + remainder, err := ioutil.ReadAll(outputRdr) + if err != nil && err != io.EOF { + pW.CloseWithError(err) + return + } + _, err = p.AddEntry(storage.Entry{ + Type: storage.SegmentType, + Payload: remainder, + }) + if err != nil { + pW.CloseWithError(err) + return + } + pW.Close() + }() + + return pR, nil +} diff --git a/vendor/src/github.com/vbatts/tar-split/tar/asm/doc.go b/vendor/src/github.com/vbatts/tar-split/tar/asm/doc.go new file mode 100644 index 0000000000..4367b90220 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/tar/asm/doc.go @@ -0,0 +1,9 @@ +/* +Package asm provides the API for streaming assembly and disassembly of tar +archives. + +Using the `github.com/vbatts/tar-split/tar/storage` for Packing/Unpacking the +metadata for a stream, as well as an implementation of Getting/Putting the file +entries' payload. +*/ +package asm diff --git a/vendor/src/github.com/vbatts/tar-split/tar/asm/testdata/t.tar.gz b/vendor/src/github.com/vbatts/tar-split/tar/asm/testdata/t.tar.gz new file mode 100644 index 0000000000..d33bf9637b Binary files /dev/null and b/vendor/src/github.com/vbatts/tar-split/tar/asm/testdata/t.tar.gz differ diff --git a/vendor/src/github.com/vbatts/tar-split/tar/storage/doc.go b/vendor/src/github.com/vbatts/tar-split/tar/storage/doc.go new file mode 100644 index 0000000000..57b61bcf16 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/tar/storage/doc.go @@ -0,0 +1,12 @@ +/* +Package storage is for metadata of a tar archive. + +Packing and unpacking the Entries of the stream. The types of streams are +either segments of raw bytes (for the raw headers and various padding) and for +an entry marking a file payload. + +The raw bytes are stored precisely in the packed (marshalled) Entry. Where as +the file payload marker include the name of the file, size, and crc64 checksum +(for basic file integrity). +*/ +package storage diff --git a/vendor/src/github.com/vbatts/tar-split/tar/storage/entry.go b/vendor/src/github.com/vbatts/tar-split/tar/storage/entry.go new file mode 100644 index 0000000000..961af49005 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/tar/storage/entry.go @@ -0,0 +1,39 @@ +package storage + +// Entries is for sorting by Position +type Entries []Entry + +func (e Entries) Len() int { return len(e) } +func (e Entries) Swap(i, j int) { e[i], e[j] = e[j], e[i] } +func (e Entries) Less(i, j int) bool { return e[i].Position < e[j].Position } + +// Type of Entry +type Type int + +const ( + // FileType represents a file payload from the tar stream. + // + // This will be used to map to relative paths on disk. Only Size > 0 will get + // read into a resulting output stream (due to hardlinks). + FileType Type = 1 + iota + // SegmentType represents a raw bytes segment from the archive stream. These raw + // byte segments consist of the raw headers and various padding. + // + // It's payload is to be marshalled base64 encoded. + SegmentType +) + +// Entry is a the structure for packing and unpacking the information read from +// the Tar archive. +// +// FileType Payload checksum is using `hash/crc64` for basic file integrity, +// _not_ for cryptography. +// From http://www.backplane.com/matt/crc64.html, CRC32 has almost 40,000 +// collisions in a sample of 18.2 million, CRC64 had none. +type Entry struct { + Type Type `json:"type"` + Name string `json:"name",omitempty` + Size int64 `json:"size",omitempty` + Payload []byte `json:"payload"` // SegmentType store payload here; FileType store crc64 checksum here; + Position int `json:"position"` +} diff --git a/vendor/src/github.com/vbatts/tar-split/tar/storage/entry_test.go b/vendor/src/github.com/vbatts/tar-split/tar/storage/entry_test.go new file mode 100644 index 0000000000..c797bca14e --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/tar/storage/entry_test.go @@ -0,0 +1,66 @@ +package storage + +import ( + "encoding/json" + "sort" + "testing" +) + +func TestEntries(t *testing.T) { + e := Entries{ + Entry{ + Type: SegmentType, + Payload: []byte("y'all"), + Position: 1, + }, + Entry{ + Type: SegmentType, + Payload: []byte("doin"), + Position: 3, + }, + Entry{ + Type: FileType, + Name: "./hurr.txt", + Payload: []byte("deadbeef"), + Position: 2, + }, + Entry{ + Type: SegmentType, + Payload: []byte("how"), + Position: 0, + }, + } + sort.Sort(e) + if e[0].Position != 0 { + t.Errorf("expected Position 0, but got %d", e[0].Position) + } +} + +func TestFile(t *testing.T) { + f := Entry{ + Type: FileType, + Name: "./hello.txt", + Size: 100, + Position: 2, + } + + buf, err := json.Marshal(f) + if err != nil { + t.Fatal(err) + } + + f1 := Entry{} + if err = json.Unmarshal(buf, &f1); err != nil { + t.Fatal(err) + } + + if f.Name != f1.Name { + t.Errorf("expected Name %q, got %q", f.Name, f1.Name) + } + if f.Size != f1.Size { + t.Errorf("expected Size %q, got %q", f.Size, f1.Size) + } + if f.Position != f1.Position { + t.Errorf("expected Position %q, got %q", f.Position, f1.Position) + } +} diff --git a/vendor/src/github.com/vbatts/tar-split/tar/storage/getter.go b/vendor/src/github.com/vbatts/tar-split/tar/storage/getter.go new file mode 100644 index 0000000000..c44b15ebef --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/tar/storage/getter.go @@ -0,0 +1,106 @@ +package storage + +import ( + "bytes" + "errors" + "hash/crc64" + "io" + "io/ioutil" + "os" + "path/filepath" +) + +// FileGetter is the interface for getting a stream of a file payload, address +// by name/filename. Presumably, the names will be scoped to relative file +// paths. +type FileGetter interface { + // Get returns a stream for the provided file path + Get(filename string) (output io.ReadCloser, err error) +} + +// FilePutter is the interface for storing a stream of a file payload, +// addressed by name/filename. +type FilePutter interface { + // Put returns the size of the stream received, and the crc64 checksum for + // the provided stream + Put(filename string, input io.Reader) (size int64, checksum []byte, err error) +} + +// FileGetPutter is the interface that groups both Getting and Putting file +// payloads. +type FileGetPutter interface { + FileGetter + FilePutter +} + +// NewPathFileGetter returns a FileGetter that is for files relative to path +// relpath. +func NewPathFileGetter(relpath string) FileGetter { + return &pathFileGetter{root: relpath} +} + +type pathFileGetter struct { + root string +} + +func (pfg pathFileGetter) Get(filename string) (io.ReadCloser, error) { + return os.Open(filepath.Join(pfg.root, filename)) +} + +type bufferFileGetPutter struct { + files map[string][]byte +} + +func (bfgp bufferFileGetPutter) Get(name string) (io.ReadCloser, error) { + if _, ok := bfgp.files[name]; !ok { + return nil, errors.New("no such file") + } + b := bytes.NewBuffer(bfgp.files[name]) + return &readCloserWrapper{b}, nil +} + +func (bfgp *bufferFileGetPutter) Put(name string, r io.Reader) (int64, []byte, error) { + c := crc64.New(CRCTable) + tRdr := io.TeeReader(r, c) + b := bytes.NewBuffer([]byte{}) + i, err := io.Copy(b, tRdr) + if err != nil { + return 0, nil, err + } + bfgp.files[name] = b.Bytes() + return i, c.Sum(nil), nil +} + +type readCloserWrapper struct { + io.Reader +} + +func (w *readCloserWrapper) Close() error { return nil } + +// NewBufferFileGetPutter is simple in memory FileGetPutter +// +// Implication is this is memory intensive... +// Probably best for testing or light weight cases. +func NewBufferFileGetPutter() FileGetPutter { + return &bufferFileGetPutter{ + files: map[string][]byte{}, + } +} + +// NewDiscardFilePutter is a bit bucket FilePutter +func NewDiscardFilePutter() FilePutter { + return &bitBucketFilePutter{} +} + +type bitBucketFilePutter struct { +} + +func (bbfp *bitBucketFilePutter) Put(name string, r io.Reader) (int64, []byte, error) { + c := crc64.New(CRCTable) + tRdr := io.TeeReader(r, c) + i, err := io.Copy(ioutil.Discard, tRdr) + return i, c.Sum(nil), err +} + +// CRCTable is the default table used for crc64 sum calculations +var CRCTable = crc64.MakeTable(crc64.ISO) diff --git a/vendor/src/github.com/vbatts/tar-split/tar/storage/getter_test.go b/vendor/src/github.com/vbatts/tar-split/tar/storage/getter_test.go new file mode 100644 index 0000000000..5a6fcc79b5 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/tar/storage/getter_test.go @@ -0,0 +1,62 @@ +package storage + +import ( + "bytes" + "io/ioutil" + "testing" +) + +func TestGetter(t *testing.T) { + fgp := NewBufferFileGetPutter() + files := map[string]map[string][]byte{ + "file1.txt": {"foo": []byte{60, 60, 48, 48, 0, 0, 0, 0}}, + "file2.txt": {"bar": []byte{45, 196, 22, 240, 0, 0, 0, 0}}, + } + for n, b := range files { + for body, sum := range b { + _, csum, err := fgp.Put(n, bytes.NewBufferString(body)) + if err != nil { + t.Error(err) + } + if !bytes.Equal(csum, sum) { + t.Errorf("checksum: expected 0x%x; got 0x%x", sum, csum) + } + } + } + for n, b := range files { + for body := range b { + r, err := fgp.Get(n) + if err != nil { + t.Error(err) + } + buf, err := ioutil.ReadAll(r) + if err != nil { + t.Error(err) + } + if body != string(buf) { + t.Errorf("expected %q, got %q", body, string(buf)) + } + } + } +} +func TestPutter(t *testing.T) { + fp := NewDiscardFilePutter() + // map[filename]map[body]crc64sum + files := map[string]map[string][]byte{ + "file1.txt": {"foo": []byte{60, 60, 48, 48, 0, 0, 0, 0}}, + "file2.txt": {"bar": []byte{45, 196, 22, 240, 0, 0, 0, 0}}, + "file3.txt": {"baz": []byte{32, 68, 22, 240, 0, 0, 0, 0}}, + "file4.txt": {"bif": []byte{48, 9, 150, 240, 0, 0, 0, 0}}, + } + for n, b := range files { + for body, sum := range b { + _, csum, err := fp.Put(n, bytes.NewBufferString(body)) + if err != nil { + t.Error(err) + } + if !bytes.Equal(csum, sum) { + t.Errorf("checksum on %q: expected %v; got %v", n, sum, csum) + } + } + } +} diff --git a/vendor/src/github.com/vbatts/tar-split/tar/storage/packer.go b/vendor/src/github.com/vbatts/tar-split/tar/storage/packer.go new file mode 100644 index 0000000000..c0070a6c21 --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/tar/storage/packer.go @@ -0,0 +1,140 @@ +package storage + +import ( + "bufio" + "encoding/json" + "errors" + "io" + "path/filepath" +) + +// ErrDuplicatePath is occured when a tar archive has more than one entry for +// the same file path +var ErrDuplicatePath = errors.New("duplicates of file paths not supported") + +// Packer describes the methods to pack Entries to a storage destination +type Packer interface { + // AddEntry packs the Entry and returns its position + AddEntry(e Entry) (int, error) +} + +// Unpacker describes the methods to read Entries from a source +type Unpacker interface { + // Next returns the next Entry being unpacked, or error, until io.EOF + Next() (*Entry, error) +} + +/* TODO(vbatts) figure out a good model for this +type PackUnpacker interface { + Packer + Unpacker +} +*/ + +type jsonUnpacker struct { + r io.Reader + b *bufio.Reader + isEOF bool + seen seenNames +} + +func (jup *jsonUnpacker) Next() (*Entry, error) { + var e Entry + if jup.isEOF { + // since ReadBytes() will return read bytes AND an EOF, we handle it this + // round-a-bout way so we can Unmarshal the tail with relevant errors, but + // still get an io.EOF when the stream is ended. + return nil, io.EOF + } + line, err := jup.b.ReadBytes('\n') + if err != nil && err != io.EOF { + return nil, err + } else if err == io.EOF { + jup.isEOF = true + } + + err = json.Unmarshal(line, &e) + if err != nil && jup.isEOF { + // if the remainder actually _wasn't_ a remaining json structure, then just EOF + return nil, io.EOF + } + + // check for dup name + if e.Type == FileType { + cName := filepath.Clean(e.Name) + if _, ok := jup.seen[cName]; ok { + return nil, ErrDuplicatePath + } + jup.seen[cName] = emptyByte + } + + return &e, err +} + +// NewJSONUnpacker provides an Unpacker that reads Entries (SegmentType and +// FileType) as a json document. +// +// Each Entry read are expected to be delimited by new line. +func NewJSONUnpacker(r io.Reader) Unpacker { + return &jsonUnpacker{ + r: r, + b: bufio.NewReader(r), + seen: seenNames{}, + } +} + +type jsonPacker struct { + w io.Writer + e *json.Encoder + pos int + seen seenNames +} + +type seenNames map[string]byte + +// used in the seenNames map. byte is a uint8, and we'll re-use the same one +// for minimalism. +const emptyByte byte = 0 + +func (jp *jsonPacker) AddEntry(e Entry) (int, error) { + // check early for dup name + if e.Type == FileType { + cName := filepath.Clean(e.Name) + if _, ok := jp.seen[cName]; ok { + return -1, ErrDuplicatePath + } + jp.seen[cName] = emptyByte + } + + e.Position = jp.pos + err := jp.e.Encode(e) + if err != nil { + return -1, err + } + + // made it this far, increment now + jp.pos++ + return e.Position, nil +} + +// NewJSONPacker provides an Packer that writes each Entry (SegmentType and +// FileType) as a json document. +// +// The Entries are delimited by new line. +func NewJSONPacker(w io.Writer) Packer { + return &jsonPacker{ + w: w, + e: json.NewEncoder(w), + seen: seenNames{}, + } +} + +/* +TODO(vbatts) perhaps have a more compact packer/unpacker, maybe using msgapck +(https://github.com/ugorji/go) + + +Even though, since our jsonUnpacker and jsonPacker just take +io.Reader/io.Writer, then we can get away with passing them a +gzip.Reader/gzip.Writer +*/ diff --git a/vendor/src/github.com/vbatts/tar-split/tar/storage/packer_test.go b/vendor/src/github.com/vbatts/tar-split/tar/storage/packer_test.go new file mode 100644 index 0000000000..1c6101f76c --- /dev/null +++ b/vendor/src/github.com/vbatts/tar-split/tar/storage/packer_test.go @@ -0,0 +1,163 @@ +package storage + +import ( + "bytes" + "compress/gzip" + "io" + "testing" +) + +func TestDuplicateFail(t *testing.T) { + e := []Entry{ + Entry{ + Type: FileType, + Name: "./hurr.txt", + Payload: []byte("abcde"), + }, + Entry{ + Type: FileType, + Name: "./hurr.txt", + Payload: []byte("deadbeef"), + }, + Entry{ + Type: FileType, + Name: "hurr.txt", // slightly different path, same file though + Payload: []byte("deadbeef"), + }, + } + buf := []byte{} + b := bytes.NewBuffer(buf) + + jp := NewJSONPacker(b) + if _, err := jp.AddEntry(e[0]); err != nil { + t.Error(err) + } + if _, err := jp.AddEntry(e[1]); err != ErrDuplicatePath { + t.Errorf("expected failure on duplicate path") + } + if _, err := jp.AddEntry(e[2]); err != ErrDuplicatePath { + t.Errorf("expected failure on duplicate path") + } +} + +func TestJSONPackerUnpacker(t *testing.T) { + e := []Entry{ + Entry{ + Type: SegmentType, + Payload: []byte("how"), + }, + Entry{ + Type: SegmentType, + Payload: []byte("y'all"), + }, + Entry{ + Type: FileType, + Name: "./hurr.txt", + Payload: []byte("deadbeef"), + }, + Entry{ + Type: SegmentType, + Payload: []byte("doin"), + }, + } + + buf := []byte{} + b := bytes.NewBuffer(buf) + + func() { + jp := NewJSONPacker(b) + for i := range e { + if _, err := jp.AddEntry(e[i]); err != nil { + t.Error(err) + } + } + }() + + // >> packer_test.go:43: uncompressed: 266 + //t.Errorf("uncompressed: %d", len(b.Bytes())) + + b = bytes.NewBuffer(b.Bytes()) + entries := Entries{} + func() { + jup := NewJSONUnpacker(b) + for { + entry, err := jup.Next() + if err != nil { + if err == io.EOF { + break + } + t.Error(err) + } + entries = append(entries, *entry) + t.Logf("got %#v", entry) + } + }() + if len(entries) != len(e) { + t.Errorf("expected %d entries, got %d", len(e), len(entries)) + } +} + +// you can use a compress Reader/Writer and make nice savings. +// +// For these two tests that are using the same set, it the difference of 266 +// bytes uncompressed vs 138 bytes compressed. +func TestGzip(t *testing.T) { + e := []Entry{ + Entry{ + Type: SegmentType, + Payload: []byte("how"), + }, + Entry{ + Type: SegmentType, + Payload: []byte("y'all"), + }, + Entry{ + Type: FileType, + Name: "./hurr.txt", + Payload: []byte("deadbeef"), + }, + Entry{ + Type: SegmentType, + Payload: []byte("doin"), + }, + } + + buf := []byte{} + b := bytes.NewBuffer(buf) + gzW := gzip.NewWriter(b) + jp := NewJSONPacker(gzW) + for i := range e { + if _, err := jp.AddEntry(e[i]); err != nil { + t.Error(err) + } + } + gzW.Close() + + // >> packer_test.go:99: compressed: 138 + //t.Errorf("compressed: %d", len(b.Bytes())) + + b = bytes.NewBuffer(b.Bytes()) + gzR, err := gzip.NewReader(b) + if err != nil { + t.Fatal(err) + } + entries := Entries{} + func() { + jup := NewJSONUnpacker(gzR) + for { + entry, err := jup.Next() + if err != nil { + if err == io.EOF { + break + } + t.Error(err) + } + entries = append(entries, *entry) + t.Logf("got %#v", entry) + } + }() + if len(entries) != len(e) { + t.Errorf("expected %d entries, got %d", len(e), len(entries)) + } + +}