|
@@ -0,0 +1,1263 @@
|
|
|
+package compactext4
|
|
|
+
|
|
|
+import (
|
|
|
+ "bufio"
|
|
|
+ "bytes"
|
|
|
+ "encoding/binary"
|
|
|
+ "errors"
|
|
|
+ "fmt"
|
|
|
+ "io"
|
|
|
+ "path"
|
|
|
+ "sort"
|
|
|
+ "strings"
|
|
|
+ "time"
|
|
|
+
|
|
|
+ "github.com/Microsoft/hcsshim/ext4/internal/format"
|
|
|
+)
|
|
|
+
|
|
|
+// Writer writes a compact ext4 file system.
|
|
|
+type Writer struct {
|
|
|
+ f io.ReadWriteSeeker
|
|
|
+ bw *bufio.Writer
|
|
|
+ inodes []*inode
|
|
|
+ curName string
|
|
|
+ curInode *inode
|
|
|
+ pos int64
|
|
|
+ dataWritten, dataMax int64
|
|
|
+ err error
|
|
|
+ initialized bool
|
|
|
+ supportInlineData bool
|
|
|
+ maxDiskSize int64
|
|
|
+ gdBlocks uint32
|
|
|
+}
|
|
|
+
|
|
|
+// Mode flags for Linux files.
|
|
|
+const (
|
|
|
+ S_IXOTH = format.S_IXOTH
|
|
|
+ S_IWOTH = format.S_IWOTH
|
|
|
+ S_IROTH = format.S_IROTH
|
|
|
+ S_IXGRP = format.S_IXGRP
|
|
|
+ S_IWGRP = format.S_IWGRP
|
|
|
+ S_IRGRP = format.S_IRGRP
|
|
|
+ S_IXUSR = format.S_IXUSR
|
|
|
+ S_IWUSR = format.S_IWUSR
|
|
|
+ S_IRUSR = format.S_IRUSR
|
|
|
+ S_ISVTX = format.S_ISVTX
|
|
|
+ S_ISGID = format.S_ISGID
|
|
|
+ S_ISUID = format.S_ISUID
|
|
|
+ S_IFIFO = format.S_IFIFO
|
|
|
+ S_IFCHR = format.S_IFCHR
|
|
|
+ S_IFDIR = format.S_IFDIR
|
|
|
+ S_IFBLK = format.S_IFBLK
|
|
|
+ S_IFREG = format.S_IFREG
|
|
|
+ S_IFLNK = format.S_IFLNK
|
|
|
+ S_IFSOCK = format.S_IFSOCK
|
|
|
+
|
|
|
+ TypeMask = format.TypeMask
|
|
|
+)
|
|
|
+
|
|
|
+type inode struct {
|
|
|
+ Size int64
|
|
|
+ Atime, Ctime, Mtime, Crtime uint64
|
|
|
+ Number format.InodeNumber
|
|
|
+ Mode uint16
|
|
|
+ Uid, Gid uint32
|
|
|
+ LinkCount uint32
|
|
|
+ XattrBlock uint32
|
|
|
+ BlockCount uint32
|
|
|
+ Devmajor, Devminor uint32
|
|
|
+ Flags format.InodeFlag
|
|
|
+ Data []byte
|
|
|
+ XattrInline []byte
|
|
|
+ Children directory
|
|
|
+}
|
|
|
+
|
|
|
+func (node *inode) FileType() uint16 {
|
|
|
+ return node.Mode & format.TypeMask
|
|
|
+}
|
|
|
+
|
|
|
+func (node *inode) IsDir() bool {
|
|
|
+ return node.FileType() == S_IFDIR
|
|
|
+}
|
|
|
+
|
|
|
+// A File represents a file to be added to an ext4 file system.
|
|
|
+type File struct {
|
|
|
+ Linkname string
|
|
|
+ Size int64
|
|
|
+ Mode uint16
|
|
|
+ Uid, Gid uint32
|
|
|
+ Atime, Ctime, Mtime, Crtime time.Time
|
|
|
+ Devmajor, Devminor uint32
|
|
|
+ Xattrs map[string][]byte
|
|
|
+}
|
|
|
+
|
|
|
+const (
|
|
|
+ inodeFirst = 11
|
|
|
+ inodeLostAndFound = inodeFirst
|
|
|
+
|
|
|
+ blockSize = 4096
|
|
|
+ blocksPerGroup = blockSize * 8
|
|
|
+ inodeSize = 256
|
|
|
+ maxInodesPerGroup = blockSize * 8 // Limited by the inode bitmap
|
|
|
+ inodesPerGroupIncrement = blockSize / inodeSize
|
|
|
+
|
|
|
+ defaultMaxDiskSize = 16 * 1024 * 1024 * 1024 // 16GB
|
|
|
+ maxMaxDiskSize = 16 * 1024 * 1024 * 1024 * 1024 // 16TB
|
|
|
+
|
|
|
+ groupDescriptorSize = 32 // Use the small group descriptor
|
|
|
+ groupsPerDescriptorBlock = blockSize / groupDescriptorSize
|
|
|
+
|
|
|
+ maxFileSize = 128 * 1024 * 1024 * 1024 // 128GB file size maximum for now
|
|
|
+ smallSymlinkSize = 59 // max symlink size that goes directly in the inode
|
|
|
+ maxBlocksPerExtent = 0x8000 // maximum number of blocks in an extent
|
|
|
+ inodeDataSize = 60
|
|
|
+ inodeUsedSize = 152 // fields through CrtimeExtra
|
|
|
+ inodeExtraSize = inodeSize - inodeUsedSize
|
|
|
+ xattrInodeOverhead = 4 + 4 // magic number + empty next entry value
|
|
|
+ xattrBlockOverhead = 32 + 4 // header + empty next entry value
|
|
|
+ inlineDataXattrOverhead = xattrInodeOverhead + 16 + 4 // entry + "data"
|
|
|
+ inlineDataSize = inodeDataSize + inodeExtraSize - inlineDataXattrOverhead
|
|
|
+)
|
|
|
+
|
|
|
+type exceededMaxSizeError struct {
|
|
|
+ Size int64
|
|
|
+}
|
|
|
+
|
|
|
+func (err exceededMaxSizeError) Error() string {
|
|
|
+ return fmt.Sprintf("disk exceeded maximum size of %d bytes", err.Size)
|
|
|
+}
|
|
|
+
|
|
|
+var directoryEntrySize = binary.Size(format.DirectoryEntry{})
|
|
|
+var extraIsize = uint16(inodeUsedSize - 128)
|
|
|
+
|
|
|
+type directory map[string]*inode
|
|
|
+
|
|
|
+func splitFirst(p string) (string, string) {
|
|
|
+ n := strings.IndexByte(p, '/')
|
|
|
+ if n >= 0 {
|
|
|
+ return p[:n], p[n+1:]
|
|
|
+ }
|
|
|
+ return p, ""
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) findPath(root *inode, p string) *inode {
|
|
|
+ inode := root
|
|
|
+ for inode != nil && len(p) != 0 {
|
|
|
+ name, rest := splitFirst(p)
|
|
|
+ p = rest
|
|
|
+ inode = inode.Children[name]
|
|
|
+ }
|
|
|
+ return inode
|
|
|
+}
|
|
|
+
|
|
|
+func timeToFsTime(t time.Time) uint64 {
|
|
|
+ if t.IsZero() {
|
|
|
+ return 0
|
|
|
+ }
|
|
|
+ s := t.Unix()
|
|
|
+ if s < -0x80000000 {
|
|
|
+ return 0x80000000
|
|
|
+ }
|
|
|
+ if s > 0x37fffffff {
|
|
|
+ return 0x37fffffff
|
|
|
+ }
|
|
|
+ return uint64(s) | uint64(t.Nanosecond())<<34
|
|
|
+}
|
|
|
+
|
|
|
+func fsTimeToTime(t uint64) time.Time {
|
|
|
+ if t == 0 {
|
|
|
+ return time.Time{}
|
|
|
+ }
|
|
|
+ s := int64(t & 0x3ffffffff)
|
|
|
+ if s > 0x7fffffff && s < 0x100000000 {
|
|
|
+ s = int64(int32(uint32(s)))
|
|
|
+ }
|
|
|
+ return time.Unix(s, int64(t>>34))
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) getInode(i format.InodeNumber) *inode {
|
|
|
+ if i == 0 || int(i) > len(w.inodes) {
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+ return w.inodes[i-1]
|
|
|
+}
|
|
|
+
|
|
|
+var xattrPrefixes = []struct {
|
|
|
+ Index uint8
|
|
|
+ Prefix string
|
|
|
+}{
|
|
|
+ {2, "system.posix_acl_access"},
|
|
|
+ {3, "system.posix_acl_default"},
|
|
|
+ {8, "system.richacl"},
|
|
|
+ {7, "system."},
|
|
|
+ {1, "user."},
|
|
|
+ {4, "trusted."},
|
|
|
+ {6, "security."},
|
|
|
+}
|
|
|
+
|
|
|
+func compressXattrName(name string) (uint8, string) {
|
|
|
+ for _, p := range xattrPrefixes {
|
|
|
+ if strings.HasPrefix(name, p.Prefix) {
|
|
|
+ return p.Index, name[len(p.Prefix):]
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return 0, name
|
|
|
+}
|
|
|
+
|
|
|
+func decompressXattrName(index uint8, name string) string {
|
|
|
+ for _, p := range xattrPrefixes {
|
|
|
+ if index == p.Index {
|
|
|
+ return p.Prefix + name
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return name
|
|
|
+}
|
|
|
+
|
|
|
+func hashXattrEntry(name string, value []byte) uint32 {
|
|
|
+ var hash uint32
|
|
|
+ for i := 0; i < len(name); i++ {
|
|
|
+ hash = (hash << 5) ^ (hash >> 27) ^ uint32(name[i])
|
|
|
+ }
|
|
|
+
|
|
|
+ for i := 0; i+3 < len(value); i += 4 {
|
|
|
+ hash = (hash << 16) ^ (hash >> 16) ^ binary.LittleEndian.Uint32(value[i:i+4])
|
|
|
+ }
|
|
|
+
|
|
|
+ if len(value)%4 != 0 {
|
|
|
+ var last [4]byte
|
|
|
+ copy(last[:], value[len(value)&^3:])
|
|
|
+ hash = (hash << 16) ^ (hash >> 16) ^ binary.LittleEndian.Uint32(last[:])
|
|
|
+ }
|
|
|
+ return hash
|
|
|
+}
|
|
|
+
|
|
|
+type xattr struct {
|
|
|
+ Name string
|
|
|
+ Index uint8
|
|
|
+ Value []byte
|
|
|
+}
|
|
|
+
|
|
|
+func (x *xattr) EntryLen() int {
|
|
|
+ return (len(x.Name)+3)&^3 + 16
|
|
|
+}
|
|
|
+
|
|
|
+func (x *xattr) ValueLen() int {
|
|
|
+ return (len(x.Value) + 3) &^ 3
|
|
|
+}
|
|
|
+
|
|
|
+type xattrState struct {
|
|
|
+ inode, block []xattr
|
|
|
+ inodeLeft, blockLeft int
|
|
|
+}
|
|
|
+
|
|
|
+func (s *xattrState) init() {
|
|
|
+ s.inodeLeft = inodeExtraSize - xattrInodeOverhead
|
|
|
+ s.blockLeft = blockSize - xattrBlockOverhead
|
|
|
+}
|
|
|
+
|
|
|
+func (s *xattrState) addXattr(name string, value []byte) bool {
|
|
|
+ index, name := compressXattrName(name)
|
|
|
+ x := xattr{
|
|
|
+ Index: index,
|
|
|
+ Name: name,
|
|
|
+ Value: value,
|
|
|
+ }
|
|
|
+ length := x.EntryLen() + x.ValueLen()
|
|
|
+ if s.inodeLeft >= length {
|
|
|
+ s.inode = append(s.inode, x)
|
|
|
+ s.inodeLeft -= length
|
|
|
+ } else if s.blockLeft >= length {
|
|
|
+ s.block = append(s.block, x)
|
|
|
+ s.blockLeft -= length
|
|
|
+ } else {
|
|
|
+ return false
|
|
|
+ }
|
|
|
+ return true
|
|
|
+}
|
|
|
+
|
|
|
+func putXattrs(xattrs []xattr, b []byte, offsetDelta uint16) {
|
|
|
+ offset := uint16(len(b)) + offsetDelta
|
|
|
+ eb := b
|
|
|
+ db := b
|
|
|
+ for _, xattr := range xattrs {
|
|
|
+ vl := xattr.ValueLen()
|
|
|
+ offset -= uint16(vl)
|
|
|
+ eb[0] = uint8(len(xattr.Name))
|
|
|
+ eb[1] = xattr.Index
|
|
|
+ binary.LittleEndian.PutUint16(eb[2:], offset)
|
|
|
+ binary.LittleEndian.PutUint32(eb[8:], uint32(len(xattr.Value)))
|
|
|
+ binary.LittleEndian.PutUint32(eb[12:], hashXattrEntry(xattr.Name, xattr.Value))
|
|
|
+ copy(eb[16:], xattr.Name)
|
|
|
+ eb = eb[xattr.EntryLen():]
|
|
|
+ copy(db[len(db)-vl:], xattr.Value)
|
|
|
+ db = db[:len(db)-vl]
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func getXattrs(b []byte, xattrs map[string][]byte, offsetDelta uint16) {
|
|
|
+ eb := b
|
|
|
+ for len(eb) != 0 {
|
|
|
+ nameLen := eb[0]
|
|
|
+ if nameLen == 0 {
|
|
|
+ break
|
|
|
+ }
|
|
|
+ index := eb[1]
|
|
|
+ offset := binary.LittleEndian.Uint16(eb[2:]) - offsetDelta
|
|
|
+ valueLen := binary.LittleEndian.Uint32(eb[8:])
|
|
|
+ attr := xattr{
|
|
|
+ Index: index,
|
|
|
+ Name: string(eb[16 : 16+nameLen]),
|
|
|
+ Value: b[offset : uint32(offset)+valueLen],
|
|
|
+ }
|
|
|
+ xattrs[decompressXattrName(index, attr.Name)] = attr.Value
|
|
|
+ eb = eb[attr.EntryLen():]
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) writeXattrs(inode *inode, state *xattrState) error {
|
|
|
+ // Write the inline attributes.
|
|
|
+ if len(state.inode) != 0 {
|
|
|
+ inode.XattrInline = make([]byte, inodeExtraSize)
|
|
|
+ binary.LittleEndian.PutUint32(inode.XattrInline[0:], format.XAttrHeaderMagic) // Magic
|
|
|
+ putXattrs(state.inode, inode.XattrInline[4:], 0)
|
|
|
+ }
|
|
|
+
|
|
|
+ // Write the block attributes. If there was previously an xattr block, then
|
|
|
+ // rewrite it even if it is now empty.
|
|
|
+ if len(state.block) != 0 || inode.XattrBlock != 0 {
|
|
|
+ sort.Slice(state.block, func(i, j int) bool {
|
|
|
+ return state.block[i].Index < state.block[j].Index ||
|
|
|
+ len(state.block[i].Name) < len(state.block[j].Name) ||
|
|
|
+ state.block[i].Name < state.block[j].Name
|
|
|
+ })
|
|
|
+
|
|
|
+ var b [blockSize]byte
|
|
|
+ binary.LittleEndian.PutUint32(b[0:], format.XAttrHeaderMagic) // Magic
|
|
|
+ binary.LittleEndian.PutUint32(b[4:], 1) // ReferenceCount
|
|
|
+ binary.LittleEndian.PutUint32(b[8:], 1) // Blocks
|
|
|
+ putXattrs(state.block, b[32:], 32)
|
|
|
+
|
|
|
+ orig := w.block()
|
|
|
+ if inode.XattrBlock == 0 {
|
|
|
+ inode.XattrBlock = orig
|
|
|
+ inode.BlockCount++
|
|
|
+ } else {
|
|
|
+ // Reuse the original block.
|
|
|
+ w.seekBlock(inode.XattrBlock)
|
|
|
+ defer w.seekBlock(orig)
|
|
|
+ }
|
|
|
+
|
|
|
+ if _, err := w.write(b[:]); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) write(b []byte) (int, error) {
|
|
|
+ if w.err != nil {
|
|
|
+ return 0, w.err
|
|
|
+ }
|
|
|
+ if w.pos+int64(len(b)) > w.maxDiskSize {
|
|
|
+ w.err = exceededMaxSizeError{w.maxDiskSize}
|
|
|
+ return 0, w.err
|
|
|
+ }
|
|
|
+ n, err := w.bw.Write(b)
|
|
|
+ w.pos += int64(n)
|
|
|
+ w.err = err
|
|
|
+ return n, err
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) zero(n int64) (int64, error) {
|
|
|
+ if w.err != nil {
|
|
|
+ return 0, w.err
|
|
|
+ }
|
|
|
+ if w.pos+int64(n) > w.maxDiskSize {
|
|
|
+ w.err = exceededMaxSizeError{w.maxDiskSize}
|
|
|
+ return 0, w.err
|
|
|
+ }
|
|
|
+ n, err := io.CopyN(w.bw, zero, n)
|
|
|
+ w.pos += n
|
|
|
+ w.err = err
|
|
|
+ return n, err
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) makeInode(f *File, node *inode) (*inode, error) {
|
|
|
+ mode := f.Mode
|
|
|
+ if mode&format.TypeMask == 0 {
|
|
|
+ mode |= format.S_IFREG
|
|
|
+ }
|
|
|
+ typ := mode & format.TypeMask
|
|
|
+ ino := format.InodeNumber(len(w.inodes) + 1)
|
|
|
+ if node == nil {
|
|
|
+ node = &inode{
|
|
|
+ Number: ino,
|
|
|
+ }
|
|
|
+ if typ == S_IFDIR {
|
|
|
+ node.Children = make(directory)
|
|
|
+ node.LinkCount = 1 // A directory is linked to itself.
|
|
|
+ }
|
|
|
+ } else if node.Flags&format.InodeFlagExtents != 0 {
|
|
|
+ // Since we cannot deallocate or reuse blocks, don't allow updates that
|
|
|
+ // would invalidate data that has already been written.
|
|
|
+ return nil, errors.New("cannot overwrite file with non-inline data")
|
|
|
+ }
|
|
|
+ node.Mode = mode
|
|
|
+ node.Uid = f.Uid
|
|
|
+ node.Gid = f.Gid
|
|
|
+ node.Flags = format.InodeFlagHugeFile
|
|
|
+ node.Atime = timeToFsTime(f.Atime)
|
|
|
+ node.Ctime = timeToFsTime(f.Ctime)
|
|
|
+ node.Mtime = timeToFsTime(f.Mtime)
|
|
|
+ node.Crtime = timeToFsTime(f.Crtime)
|
|
|
+ node.Devmajor = f.Devmajor
|
|
|
+ node.Devminor = f.Devminor
|
|
|
+ node.Data = nil
|
|
|
+ node.XattrInline = nil
|
|
|
+
|
|
|
+ var xstate xattrState
|
|
|
+ xstate.init()
|
|
|
+
|
|
|
+ var size int64
|
|
|
+ switch typ {
|
|
|
+ case format.S_IFREG:
|
|
|
+ size = f.Size
|
|
|
+ if f.Size > maxFileSize {
|
|
|
+ return nil, fmt.Errorf("file too big: %d > %d", f.Size, maxFileSize)
|
|
|
+ }
|
|
|
+ if f.Size <= inlineDataSize && w.supportInlineData {
|
|
|
+ node.Data = make([]byte, f.Size)
|
|
|
+ extra := 0
|
|
|
+ if f.Size > inodeDataSize {
|
|
|
+ extra = int(f.Size - inodeDataSize)
|
|
|
+ }
|
|
|
+ // Add a dummy entry for now.
|
|
|
+ if !xstate.addXattr("system.data", node.Data[:extra]) {
|
|
|
+ panic("not enough room for inline data")
|
|
|
+ }
|
|
|
+ node.Flags |= format.InodeFlagInlineData
|
|
|
+ }
|
|
|
+ case format.S_IFLNK:
|
|
|
+ node.Mode |= 0777 // Symlinks should appear as ugw rwx
|
|
|
+ size = int64(len(f.Linkname))
|
|
|
+ if size <= smallSymlinkSize {
|
|
|
+ // Special case: small symlinks go directly in Block without setting
|
|
|
+ // an inline data flag.
|
|
|
+ node.Data = make([]byte, len(f.Linkname))
|
|
|
+ copy(node.Data, f.Linkname)
|
|
|
+ }
|
|
|
+ case format.S_IFDIR, format.S_IFIFO, format.S_IFSOCK, format.S_IFCHR, format.S_IFBLK:
|
|
|
+ default:
|
|
|
+ return nil, fmt.Errorf("invalid mode %o", mode)
|
|
|
+ }
|
|
|
+
|
|
|
+ // Accumulate the extended attributes.
|
|
|
+ if len(f.Xattrs) != 0 {
|
|
|
+ // Sort the xattrs to avoid non-determinism in map iteration.
|
|
|
+ var xattrs []string
|
|
|
+ for name := range f.Xattrs {
|
|
|
+ xattrs = append(xattrs, name)
|
|
|
+ }
|
|
|
+ sort.Strings(xattrs)
|
|
|
+ for _, name := range xattrs {
|
|
|
+ if !xstate.addXattr(name, f.Xattrs[name]) {
|
|
|
+ return nil, fmt.Errorf("could not fit xattr %s", name)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if err := w.writeXattrs(node, &xstate); err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+
|
|
|
+ node.Size = size
|
|
|
+ if typ == format.S_IFLNK && size > smallSymlinkSize {
|
|
|
+ // Write the link name as data.
|
|
|
+ w.startInode("", node, size)
|
|
|
+ if _, err := w.Write([]byte(f.Linkname)); err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ if err := w.finishInode(); err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if int(node.Number-1) >= len(w.inodes) {
|
|
|
+ w.inodes = append(w.inodes, node)
|
|
|
+ }
|
|
|
+ return node, nil
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) root() *inode {
|
|
|
+ return w.getInode(format.InodeRoot)
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) lookup(name string, mustExist bool) (*inode, *inode, string, error) {
|
|
|
+ root := w.root()
|
|
|
+ cleanname := path.Clean("/" + name)[1:]
|
|
|
+ if len(cleanname) == 0 {
|
|
|
+ return root, root, "", nil
|
|
|
+ }
|
|
|
+ dirname, childname := path.Split(cleanname)
|
|
|
+ if len(childname) == 0 || len(childname) > 0xff {
|
|
|
+ return nil, nil, "", fmt.Errorf("%s: invalid name", name)
|
|
|
+ }
|
|
|
+ dir := w.findPath(root, dirname)
|
|
|
+ if dir == nil || !dir.IsDir() {
|
|
|
+ return nil, nil, "", fmt.Errorf("%s: path not found", name)
|
|
|
+ }
|
|
|
+ child := dir.Children[childname]
|
|
|
+ if child == nil && mustExist {
|
|
|
+ return nil, nil, "", fmt.Errorf("%s: file not found", name)
|
|
|
+ }
|
|
|
+ return dir, child, childname, nil
|
|
|
+}
|
|
|
+
|
|
|
+// Create adds a file to the file system.
|
|
|
+func (w *Writer) Create(name string, f *File) error {
|
|
|
+ if err := w.finishInode(); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ dir, existing, childname, err := w.lookup(name, false)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ var reuse *inode
|
|
|
+ if existing != nil {
|
|
|
+ if existing.IsDir() {
|
|
|
+ if f.Mode&TypeMask != S_IFDIR {
|
|
|
+ return fmt.Errorf("%s: cannot replace a directory with a file", name)
|
|
|
+ }
|
|
|
+ reuse = existing
|
|
|
+ } else if f.Mode&TypeMask == S_IFDIR {
|
|
|
+ return fmt.Errorf("%s: cannot replace a file with a directory", name)
|
|
|
+ } else if existing.LinkCount < 2 {
|
|
|
+ reuse = existing
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ if f.Mode&TypeMask == S_IFDIR && dir.LinkCount >= format.MaxLinks {
|
|
|
+ return fmt.Errorf("%s: exceeded parent directory maximum link count", name)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ child, err := w.makeInode(f, reuse)
|
|
|
+ if err != nil {
|
|
|
+ return fmt.Errorf("%s: %s", name, err)
|
|
|
+ }
|
|
|
+ if existing != child {
|
|
|
+ if existing != nil {
|
|
|
+ existing.LinkCount--
|
|
|
+ }
|
|
|
+ dir.Children[childname] = child
|
|
|
+ child.LinkCount++
|
|
|
+ if child.IsDir() {
|
|
|
+ dir.LinkCount++
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if child.Mode&format.TypeMask == format.S_IFREG {
|
|
|
+ w.startInode(name, child, f.Size)
|
|
|
+ }
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+// Link adds a hard link to the file system.
|
|
|
+func (w *Writer) Link(oldname, newname string) error {
|
|
|
+ if err := w.finishInode(); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ newdir, existing, newchildname, err := w.lookup(newname, false)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ if existing != nil && (existing.IsDir() || existing.LinkCount < 2) {
|
|
|
+ return fmt.Errorf("%s: cannot orphan existing file or directory", newname)
|
|
|
+ }
|
|
|
+
|
|
|
+ _, oldfile, _, err := w.lookup(oldname, true)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ switch oldfile.Mode & format.TypeMask {
|
|
|
+ case format.S_IFDIR, format.S_IFLNK:
|
|
|
+ return fmt.Errorf("%s: link target cannot be a directory or symlink: %s", newname, oldname)
|
|
|
+ }
|
|
|
+
|
|
|
+ if existing != oldfile && oldfile.LinkCount >= format.MaxLinks {
|
|
|
+ return fmt.Errorf("%s: link target would exceed maximum link count: %s", newname, oldname)
|
|
|
+ }
|
|
|
+
|
|
|
+ if existing != nil {
|
|
|
+ existing.LinkCount--
|
|
|
+ }
|
|
|
+ oldfile.LinkCount++
|
|
|
+ newdir.Children[newchildname] = oldfile
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+// Stat returns information about a file that has been written.
|
|
|
+func (w *Writer) Stat(name string) (*File, error) {
|
|
|
+ if err := w.finishInode(); err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ _, node, _, err := w.lookup(name, true)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ f := &File{
|
|
|
+ Size: node.Size,
|
|
|
+ Mode: node.Mode,
|
|
|
+ Uid: node.Uid,
|
|
|
+ Gid: node.Gid,
|
|
|
+ Atime: fsTimeToTime(node.Atime),
|
|
|
+ Ctime: fsTimeToTime(node.Ctime),
|
|
|
+ Mtime: fsTimeToTime(node.Mtime),
|
|
|
+ Crtime: fsTimeToTime(node.Crtime),
|
|
|
+ Devmajor: node.Devmajor,
|
|
|
+ Devminor: node.Devminor,
|
|
|
+ }
|
|
|
+ f.Xattrs = make(map[string][]byte)
|
|
|
+ if node.XattrBlock != 0 || len(node.XattrInline) != 0 {
|
|
|
+ if node.XattrBlock != 0 {
|
|
|
+ orig := w.block()
|
|
|
+ w.seekBlock(node.XattrBlock)
|
|
|
+ if w.err != nil {
|
|
|
+ return nil, w.err
|
|
|
+ }
|
|
|
+ var b [blockSize]byte
|
|
|
+ _, err := w.f.Read(b[:])
|
|
|
+ w.seekBlock(orig)
|
|
|
+ if err != nil {
|
|
|
+ return nil, err
|
|
|
+ }
|
|
|
+ getXattrs(b[32:], f.Xattrs, 32)
|
|
|
+ }
|
|
|
+ if len(node.XattrInline) != 0 {
|
|
|
+ getXattrs(node.XattrInline[4:], f.Xattrs, 0)
|
|
|
+ delete(f.Xattrs, "system.data")
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if node.FileType() == S_IFLNK {
|
|
|
+ if node.Size > smallSymlinkSize {
|
|
|
+ return nil, fmt.Errorf("%s: cannot retrieve link information", name)
|
|
|
+ }
|
|
|
+ f.Linkname = string(node.Data)
|
|
|
+ }
|
|
|
+ return f, nil
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) Write(b []byte) (int, error) {
|
|
|
+ if len(b) == 0 {
|
|
|
+ return 0, nil
|
|
|
+ }
|
|
|
+ if w.dataWritten+int64(len(b)) > w.dataMax {
|
|
|
+ return 0, fmt.Errorf("%s: wrote too much: %d > %d", w.curName, w.dataWritten+int64(len(b)), w.dataMax)
|
|
|
+ }
|
|
|
+
|
|
|
+ if w.curInode.Flags&format.InodeFlagInlineData != 0 {
|
|
|
+ copy(w.curInode.Data[w.dataWritten:], b)
|
|
|
+ w.dataWritten += int64(len(b))
|
|
|
+ return len(b), nil
|
|
|
+ }
|
|
|
+
|
|
|
+ n, err := w.write(b)
|
|
|
+ w.dataWritten += int64(n)
|
|
|
+ return n, err
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) startInode(name string, inode *inode, size int64) {
|
|
|
+ if w.curInode != nil {
|
|
|
+ panic("inode already in progress")
|
|
|
+ }
|
|
|
+ w.curName = name
|
|
|
+ w.curInode = inode
|
|
|
+ w.dataWritten = 0
|
|
|
+ w.dataMax = size
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) block() uint32 {
|
|
|
+ return uint32(w.pos / blockSize)
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) seekBlock(block uint32) {
|
|
|
+ w.pos = int64(block) * blockSize
|
|
|
+ if w.err != nil {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ w.err = w.bw.Flush()
|
|
|
+ if w.err != nil {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ _, w.err = w.f.Seek(w.pos, io.SeekStart)
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) nextBlock() {
|
|
|
+ if w.pos%blockSize != 0 {
|
|
|
+ // Simplify callers; w.err is updated on failure.
|
|
|
+ w.zero(blockSize - w.pos%blockSize)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func fillExtents(hdr *format.ExtentHeader, extents []format.ExtentLeafNode, startBlock, offset, inodeSize uint32) {
|
|
|
+ *hdr = format.ExtentHeader{
|
|
|
+ Magic: format.ExtentHeaderMagic,
|
|
|
+ Entries: uint16(len(extents)),
|
|
|
+ Max: uint16(cap(extents)),
|
|
|
+ Depth: 0,
|
|
|
+ }
|
|
|
+ for i := range extents {
|
|
|
+ block := offset + uint32(i)*maxBlocksPerExtent
|
|
|
+ length := inodeSize - block
|
|
|
+ if length > maxBlocksPerExtent {
|
|
|
+ length = maxBlocksPerExtent
|
|
|
+ }
|
|
|
+ start := startBlock + block
|
|
|
+ extents[i] = format.ExtentLeafNode{
|
|
|
+ Block: block,
|
|
|
+ Length: uint16(length),
|
|
|
+ StartLow: start,
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) writeExtents(inode *inode) error {
|
|
|
+ start := w.pos - w.dataWritten
|
|
|
+ if start%blockSize != 0 {
|
|
|
+ panic("unaligned")
|
|
|
+ }
|
|
|
+ w.nextBlock()
|
|
|
+
|
|
|
+ startBlock := uint32(start / blockSize)
|
|
|
+ blocks := w.block() - startBlock
|
|
|
+ usedBlocks := blocks
|
|
|
+
|
|
|
+ const extentNodeSize = 12
|
|
|
+ const extentsPerBlock = blockSize/extentNodeSize - 1
|
|
|
+
|
|
|
+ extents := (blocks + maxBlocksPerExtent - 1) / maxBlocksPerExtent
|
|
|
+ var b bytes.Buffer
|
|
|
+ if extents == 0 {
|
|
|
+ // Nothing to do.
|
|
|
+ } else if extents <= 4 {
|
|
|
+ var root struct {
|
|
|
+ hdr format.ExtentHeader
|
|
|
+ extents [4]format.ExtentLeafNode
|
|
|
+ }
|
|
|
+ fillExtents(&root.hdr, root.extents[:extents], startBlock, 0, blocks)
|
|
|
+ binary.Write(&b, binary.LittleEndian, root)
|
|
|
+ } else if extents <= 4*extentsPerBlock {
|
|
|
+ const extentsPerBlock = blockSize/extentNodeSize - 1
|
|
|
+ extentBlocks := extents/extentsPerBlock + 1
|
|
|
+ usedBlocks += extentBlocks
|
|
|
+ var b2 bytes.Buffer
|
|
|
+
|
|
|
+ var root struct {
|
|
|
+ hdr format.ExtentHeader
|
|
|
+ nodes [4]format.ExtentIndexNode
|
|
|
+ }
|
|
|
+ root.hdr = format.ExtentHeader{
|
|
|
+ Magic: format.ExtentHeaderMagic,
|
|
|
+ Entries: uint16(extentBlocks),
|
|
|
+ Max: 4,
|
|
|
+ Depth: 1,
|
|
|
+ }
|
|
|
+ for i := uint32(0); i < extentBlocks; i++ {
|
|
|
+ root.nodes[i] = format.ExtentIndexNode{
|
|
|
+ Block: i * extentsPerBlock * maxBlocksPerExtent,
|
|
|
+ LeafLow: w.block(),
|
|
|
+ }
|
|
|
+ extentsInBlock := extents - i*extentBlocks
|
|
|
+ if extentsInBlock > extentsPerBlock {
|
|
|
+ extentsInBlock = extentsPerBlock
|
|
|
+ }
|
|
|
+
|
|
|
+ var node struct {
|
|
|
+ hdr format.ExtentHeader
|
|
|
+ extents [extentsPerBlock]format.ExtentLeafNode
|
|
|
+ _ [blockSize - (extentsPerBlock+1)*extentNodeSize]byte
|
|
|
+ }
|
|
|
+
|
|
|
+ offset := i * extentsPerBlock * maxBlocksPerExtent
|
|
|
+ fillExtents(&node.hdr, node.extents[:extentsInBlock], startBlock+offset, offset, blocks)
|
|
|
+ binary.Write(&b2, binary.LittleEndian, node)
|
|
|
+ if _, err := w.write(b2.Next(blockSize)); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ }
|
|
|
+ binary.Write(&b, binary.LittleEndian, root)
|
|
|
+ } else {
|
|
|
+ panic("file too big")
|
|
|
+ }
|
|
|
+
|
|
|
+ inode.Data = b.Bytes()
|
|
|
+ inode.Flags |= format.InodeFlagExtents
|
|
|
+ inode.BlockCount += usedBlocks
|
|
|
+ return w.err
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) finishInode() error {
|
|
|
+ if !w.initialized {
|
|
|
+ if err := w.init(); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if w.curInode == nil {
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+ if w.dataWritten != w.dataMax {
|
|
|
+ return fmt.Errorf("did not write the right amount: %d != %d", w.dataWritten, w.dataMax)
|
|
|
+ }
|
|
|
+
|
|
|
+ if w.dataMax != 0 && w.curInode.Flags&format.InodeFlagInlineData == 0 {
|
|
|
+ if err := w.writeExtents(w.curInode); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ w.dataWritten = 0
|
|
|
+ w.dataMax = 0
|
|
|
+ w.curInode = nil
|
|
|
+ return w.err
|
|
|
+}
|
|
|
+
|
|
|
+func modeToFileType(mode uint16) format.FileType {
|
|
|
+ switch mode & format.TypeMask {
|
|
|
+ default:
|
|
|
+ return format.FileTypeUnknown
|
|
|
+ case format.S_IFREG:
|
|
|
+ return format.FileTypeRegular
|
|
|
+ case format.S_IFDIR:
|
|
|
+ return format.FileTypeDirectory
|
|
|
+ case format.S_IFCHR:
|
|
|
+ return format.FileTypeCharacter
|
|
|
+ case format.S_IFBLK:
|
|
|
+ return format.FileTypeBlock
|
|
|
+ case format.S_IFIFO:
|
|
|
+ return format.FileTypeFIFO
|
|
|
+ case format.S_IFSOCK:
|
|
|
+ return format.FileTypeSocket
|
|
|
+ case format.S_IFLNK:
|
|
|
+ return format.FileTypeSymbolicLink
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+type constReader byte
|
|
|
+
|
|
|
+var zero = constReader(0)
|
|
|
+
|
|
|
+func (r constReader) Read(b []byte) (int, error) {
|
|
|
+ for i := range b {
|
|
|
+ b[i] = byte(r)
|
|
|
+ }
|
|
|
+ return len(b), nil
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) writeDirectory(dir, parent *inode) error {
|
|
|
+ if err := w.finishInode(); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+
|
|
|
+ // The size of the directory is not known yet.
|
|
|
+ w.startInode("", dir, 0x7fffffffffffffff)
|
|
|
+ left := blockSize
|
|
|
+ finishBlock := func() error {
|
|
|
+ if left > 0 {
|
|
|
+ e := format.DirectoryEntry{
|
|
|
+ RecordLength: uint16(left),
|
|
|
+ }
|
|
|
+ err := binary.Write(w, binary.LittleEndian, e)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ left -= directoryEntrySize
|
|
|
+ if left < 4 {
|
|
|
+ panic("not enough space for trailing entry")
|
|
|
+ }
|
|
|
+ _, err = io.CopyN(w, zero, int64(left))
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ }
|
|
|
+ left = blockSize
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+
|
|
|
+ writeEntry := func(ino format.InodeNumber, name string) error {
|
|
|
+ rlb := directoryEntrySize + len(name)
|
|
|
+ rl := (rlb + 3) & ^3
|
|
|
+ if left < rl+12 {
|
|
|
+ if err := finishBlock(); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ }
|
|
|
+ e := format.DirectoryEntry{
|
|
|
+ Inode: ino,
|
|
|
+ RecordLength: uint16(rl),
|
|
|
+ NameLength: uint8(len(name)),
|
|
|
+ FileType: modeToFileType(w.getInode(ino).Mode),
|
|
|
+ }
|
|
|
+ err := binary.Write(w, binary.LittleEndian, e)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ _, err = w.Write([]byte(name))
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ var zero [4]byte
|
|
|
+ _, err = w.Write(zero[:rl-rlb])
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ left -= rl
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+ if err := writeEntry(dir.Number, "."); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ if err := writeEntry(parent.Number, ".."); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+
|
|
|
+ // Follow e2fsck's convention and sort the children by inode number.
|
|
|
+ var children []string
|
|
|
+ for name := range dir.Children {
|
|
|
+ children = append(children, name)
|
|
|
+ }
|
|
|
+ sort.Slice(children, func(i, j int) bool {
|
|
|
+ return dir.Children[children[i]].Number < dir.Children[children[j]].Number
|
|
|
+ })
|
|
|
+
|
|
|
+ for _, name := range children {
|
|
|
+ child := dir.Children[name]
|
|
|
+ if err := writeEntry(child.Number, name); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if err := finishBlock(); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ w.curInode.Size = w.dataWritten
|
|
|
+ w.dataMax = w.dataWritten
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) writeDirectoryRecursive(dir, parent *inode) error {
|
|
|
+ if err := w.writeDirectory(dir, parent); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ for _, child := range dir.Children {
|
|
|
+ if child.IsDir() {
|
|
|
+ if err := w.writeDirectoryRecursive(child, dir); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) writeInodeTable(tableSize uint32) error {
|
|
|
+ var b bytes.Buffer
|
|
|
+ for _, inode := range w.inodes {
|
|
|
+ if inode != nil {
|
|
|
+ binode := format.Inode{
|
|
|
+ Mode: inode.Mode,
|
|
|
+ Uid: uint16(inode.Uid & 0xffff),
|
|
|
+ Gid: uint16(inode.Gid & 0xffff),
|
|
|
+ SizeLow: uint32(inode.Size & 0xffffffff),
|
|
|
+ SizeHigh: uint32(inode.Size >> 32),
|
|
|
+ LinksCount: uint16(inode.LinkCount),
|
|
|
+ BlocksLow: inode.BlockCount,
|
|
|
+ Flags: inode.Flags,
|
|
|
+ XattrBlockLow: inode.XattrBlock,
|
|
|
+ UidHigh: uint16(inode.Uid >> 16),
|
|
|
+ GidHigh: uint16(inode.Gid >> 16),
|
|
|
+ ExtraIsize: uint16(inodeUsedSize - 128),
|
|
|
+ Atime: uint32(inode.Atime),
|
|
|
+ AtimeExtra: uint32(inode.Atime >> 32),
|
|
|
+ Ctime: uint32(inode.Ctime),
|
|
|
+ CtimeExtra: uint32(inode.Ctime >> 32),
|
|
|
+ Mtime: uint32(inode.Mtime),
|
|
|
+ MtimeExtra: uint32(inode.Mtime >> 32),
|
|
|
+ Crtime: uint32(inode.Crtime),
|
|
|
+ CrtimeExtra: uint32(inode.Crtime >> 32),
|
|
|
+ }
|
|
|
+ switch inode.Mode & format.TypeMask {
|
|
|
+ case format.S_IFDIR, format.S_IFREG, format.S_IFLNK:
|
|
|
+ n := copy(binode.Block[:], inode.Data)
|
|
|
+ if n < len(inode.Data) {
|
|
|
+ // Rewrite the first xattr with the data.
|
|
|
+ xattr := [1]xattr{{
|
|
|
+ Name: "data",
|
|
|
+ Index: 7, // "system."
|
|
|
+ Value: inode.Data[n:],
|
|
|
+ }}
|
|
|
+ putXattrs(xattr[:], inode.XattrInline[4:], 0)
|
|
|
+ }
|
|
|
+ case format.S_IFBLK, format.S_IFCHR:
|
|
|
+ dev := inode.Devminor&0xff | inode.Devmajor<<8 | (inode.Devminor&0xffffff00)<<12
|
|
|
+ binary.LittleEndian.PutUint32(binode.Block[4:], dev)
|
|
|
+ }
|
|
|
+
|
|
|
+ binary.Write(&b, binary.LittleEndian, binode)
|
|
|
+ b.Truncate(inodeUsedSize)
|
|
|
+ n, _ := b.Write(inode.XattrInline)
|
|
|
+ io.CopyN(&b, zero, int64(inodeExtraSize-n))
|
|
|
+ } else {
|
|
|
+ io.CopyN(&b, zero, inodeSize)
|
|
|
+ }
|
|
|
+ if _, err := w.write(b.Next(inodeSize)); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ }
|
|
|
+ rest := tableSize - uint32(len(w.inodes)*inodeSize)
|
|
|
+ if _, err := w.zero(int64(rest)); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ return nil
|
|
|
+}
|
|
|
+
|
|
|
+// NewWriter returns a Writer that writes an ext4 file system to the provided
|
|
|
+// WriteSeeker.
|
|
|
+func NewWriter(f io.ReadWriteSeeker, opts ...Option) *Writer {
|
|
|
+ w := &Writer{
|
|
|
+ f: f,
|
|
|
+ bw: bufio.NewWriterSize(f, 65536*8),
|
|
|
+ maxDiskSize: defaultMaxDiskSize,
|
|
|
+ }
|
|
|
+ for _, opt := range opts {
|
|
|
+ opt(w)
|
|
|
+ }
|
|
|
+ return w
|
|
|
+}
|
|
|
+
|
|
|
+// An Option provides extra options to NewWriter.
|
|
|
+type Option func(*Writer)
|
|
|
+
|
|
|
+// InlineData instructs the Writer to write small files into the inode
|
|
|
+// structures directly. This creates smaller images but currently is not
|
|
|
+// compatible with DAX.
|
|
|
+func InlineData(w *Writer) {
|
|
|
+ w.supportInlineData = true
|
|
|
+}
|
|
|
+
|
|
|
+// MaximumDiskSize instructs the writer to reserve enough metadata space for the
|
|
|
+// specified disk size. If not provided, then 16GB is the default.
|
|
|
+func MaximumDiskSize(size int64) Option {
|
|
|
+ return func(w *Writer) {
|
|
|
+ if size < 0 || size > maxMaxDiskSize {
|
|
|
+ w.maxDiskSize = maxMaxDiskSize
|
|
|
+ } else if size == 0 {
|
|
|
+ w.maxDiskSize = defaultMaxDiskSize
|
|
|
+ } else {
|
|
|
+ w.maxDiskSize = (size + blockSize - 1) &^ (blockSize - 1)
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) init() error {
|
|
|
+ // Skip the defective block inode.
|
|
|
+ w.inodes = make([]*inode, 1, 32)
|
|
|
+ // Create the root directory.
|
|
|
+ root, _ := w.makeInode(&File{
|
|
|
+ Mode: format.S_IFDIR | 0755,
|
|
|
+ }, nil)
|
|
|
+ root.LinkCount++ // The root is linked to itself.
|
|
|
+ // Skip until the first non-reserved inode.
|
|
|
+ w.inodes = append(w.inodes, make([]*inode, inodeFirst-len(w.inodes)-1)...)
|
|
|
+ maxBlocks := (w.maxDiskSize-1)/blockSize + 1
|
|
|
+ maxGroups := (maxBlocks-1)/blocksPerGroup + 1
|
|
|
+ w.gdBlocks = uint32((maxGroups-1)/groupsPerDescriptorBlock + 1)
|
|
|
+
|
|
|
+ // Skip past the superblock and block descriptor table.
|
|
|
+ w.seekBlock(1 + w.gdBlocks)
|
|
|
+ w.initialized = true
|
|
|
+
|
|
|
+ // The lost+found directory is required to exist for e2fsck to pass.
|
|
|
+ if err := w.Create("lost+found", &File{Mode: format.S_IFDIR | 0700}); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ return w.err
|
|
|
+}
|
|
|
+
|
|
|
+func groupCount(blocks uint32, inodes uint32, inodesPerGroup uint32) uint32 {
|
|
|
+ inodeBlocksPerGroup := inodesPerGroup * inodeSize / blockSize
|
|
|
+ dataBlocksPerGroup := blocksPerGroup - inodeBlocksPerGroup - 2 // save room for the bitmaps
|
|
|
+
|
|
|
+ // Increase the block count to ensure there are enough groups for all the
|
|
|
+ // inodes.
|
|
|
+ minBlocks := (inodes-1)/inodesPerGroup*dataBlocksPerGroup + 1
|
|
|
+ if blocks < minBlocks {
|
|
|
+ blocks = minBlocks
|
|
|
+ }
|
|
|
+
|
|
|
+ return (blocks + dataBlocksPerGroup - 1) / dataBlocksPerGroup
|
|
|
+}
|
|
|
+
|
|
|
+func bestGroupCount(blocks uint32, inodes uint32) (groups uint32, inodesPerGroup uint32) {
|
|
|
+ groups = 0xffffffff
|
|
|
+ for ipg := uint32(inodesPerGroupIncrement); ipg <= maxInodesPerGroup; ipg += inodesPerGroupIncrement {
|
|
|
+ g := groupCount(blocks, inodes, ipg)
|
|
|
+ if g < groups {
|
|
|
+ groups = g
|
|
|
+ inodesPerGroup = ipg
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+func (w *Writer) Close() error {
|
|
|
+ if err := w.finishInode(); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ root := w.root()
|
|
|
+ if err := w.writeDirectoryRecursive(root, root); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ // Finish the last inode (probably a directory).
|
|
|
+ if err := w.finishInode(); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+
|
|
|
+ // Write the inode table
|
|
|
+ inodeTableOffset := w.block()
|
|
|
+ groups, inodesPerGroup := bestGroupCount(inodeTableOffset, uint32(len(w.inodes)))
|
|
|
+ err := w.writeInodeTable(groups * inodesPerGroup * inodeSize)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+
|
|
|
+ // Write the bitmaps.
|
|
|
+ bitmapOffset := w.block()
|
|
|
+ bitmapSize := groups * 2
|
|
|
+ validDataSize := bitmapOffset + bitmapSize
|
|
|
+ diskSize := validDataSize
|
|
|
+ minSize := (groups-1)*blocksPerGroup + 1
|
|
|
+ if diskSize < minSize {
|
|
|
+ diskSize = minSize
|
|
|
+ }
|
|
|
+
|
|
|
+ usedGdBlocks := (groups-1)/groupDescriptorSize + 1
|
|
|
+ if usedGdBlocks > w.gdBlocks {
|
|
|
+ return exceededMaxSizeError{w.maxDiskSize}
|
|
|
+ }
|
|
|
+
|
|
|
+ gds := make([]format.GroupDescriptor, w.gdBlocks*groupsPerDescriptorBlock)
|
|
|
+ inodeTableSizePerGroup := inodesPerGroup * inodeSize / blockSize
|
|
|
+ var totalUsedBlocks, totalUsedInodes uint32
|
|
|
+ for g := uint32(0); g < groups; g++ {
|
|
|
+ var b [blockSize * 2]byte
|
|
|
+ var dirCount, usedInodeCount, usedBlockCount uint16
|
|
|
+
|
|
|
+ // Block bitmap
|
|
|
+ if (g+1)*blocksPerGroup <= validDataSize {
|
|
|
+ // This group is fully allocated.
|
|
|
+ for j := range b[:blockSize] {
|
|
|
+ b[j] = 0xff
|
|
|
+ }
|
|
|
+ usedBlockCount = blocksPerGroup
|
|
|
+ } else if g*blocksPerGroup < validDataSize {
|
|
|
+ for j := uint32(0); j < validDataSize-g*blocksPerGroup; j++ {
|
|
|
+ b[j/8] |= 1 << (j % 8)
|
|
|
+ usedBlockCount++
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if g == 0 {
|
|
|
+ // Unused group descriptor blocks should be cleared.
|
|
|
+ for j := 1 + usedGdBlocks; j < 1+w.gdBlocks; j++ {
|
|
|
+ b[j/8] &^= 1 << (j % 8)
|
|
|
+ usedBlockCount--
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if g == groups-1 && diskSize%blocksPerGroup != 0 {
|
|
|
+ // Blocks that aren't present in the disk should be marked as
|
|
|
+ // allocated.
|
|
|
+ for j := diskSize % blocksPerGroup; j < blocksPerGroup; j++ {
|
|
|
+ b[j/8] |= 1 << (j % 8)
|
|
|
+ usedBlockCount++
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // Inode bitmap
|
|
|
+ for j := uint32(0); j < inodesPerGroup; j++ {
|
|
|
+ ino := format.InodeNumber(1 + g*inodesPerGroup + j)
|
|
|
+ inode := w.getInode(ino)
|
|
|
+ if ino < inodeFirst || inode != nil {
|
|
|
+ b[blockSize+j/8] |= 1 << (j % 8)
|
|
|
+ usedInodeCount++
|
|
|
+ }
|
|
|
+ if inode != nil && inode.Mode&format.TypeMask == format.S_IFDIR {
|
|
|
+ dirCount++
|
|
|
+ }
|
|
|
+ }
|
|
|
+ _, err := w.write(b[:])
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ gds[g] = format.GroupDescriptor{
|
|
|
+ BlockBitmapLow: bitmapOffset + 2*g,
|
|
|
+ InodeBitmapLow: bitmapOffset + 2*g + 1,
|
|
|
+ InodeTableLow: inodeTableOffset + g*inodeTableSizePerGroup,
|
|
|
+ UsedDirsCountLow: dirCount,
|
|
|
+ FreeInodesCountLow: uint16(inodesPerGroup) - usedInodeCount,
|
|
|
+ FreeBlocksCountLow: blocksPerGroup - usedBlockCount,
|
|
|
+ }
|
|
|
+
|
|
|
+ totalUsedBlocks += uint32(usedBlockCount)
|
|
|
+ totalUsedInodes += uint32(usedInodeCount)
|
|
|
+ }
|
|
|
+
|
|
|
+ // Zero up to the disk size.
|
|
|
+ _, err = w.zero(int64(diskSize-bitmapOffset-bitmapSize) * blockSize)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+
|
|
|
+ // Write the block descriptors
|
|
|
+ w.seekBlock(1)
|
|
|
+ if w.err != nil {
|
|
|
+ return w.err
|
|
|
+ }
|
|
|
+ err = binary.Write(w.bw, binary.LittleEndian, gds)
|
|
|
+ if err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+
|
|
|
+ // Write the super block
|
|
|
+ var blk [blockSize]byte
|
|
|
+ b := bytes.NewBuffer(blk[:1024])
|
|
|
+ sb := &format.SuperBlock{
|
|
|
+ InodesCount: inodesPerGroup * groups,
|
|
|
+ BlocksCountLow: diskSize,
|
|
|
+ FreeBlocksCountLow: blocksPerGroup*groups - totalUsedBlocks,
|
|
|
+ FreeInodesCount: inodesPerGroup*groups - totalUsedInodes,
|
|
|
+ FirstDataBlock: 0,
|
|
|
+ LogBlockSize: 2, // 2^(10 + 2)
|
|
|
+ LogClusterSize: 2,
|
|
|
+ BlocksPerGroup: blocksPerGroup,
|
|
|
+ ClustersPerGroup: blocksPerGroup,
|
|
|
+ InodesPerGroup: inodesPerGroup,
|
|
|
+ Magic: format.SuperBlockMagic,
|
|
|
+ State: 1, // cleanly unmounted
|
|
|
+ Errors: 1, // continue on error?
|
|
|
+ CreatorOS: 0, // Linux
|
|
|
+ RevisionLevel: 1, // dynamic inode sizes
|
|
|
+ FirstInode: inodeFirst,
|
|
|
+ LpfInode: inodeLostAndFound,
|
|
|
+ InodeSize: inodeSize,
|
|
|
+ FeatureCompat: format.CompatSparseSuper2 | format.CompatExtAttr,
|
|
|
+ FeatureIncompat: format.IncompatFiletype | format.IncompatExtents | format.IncompatFlexBg,
|
|
|
+ FeatureRoCompat: format.RoCompatLargeFile | format.RoCompatHugeFile | format.RoCompatExtraIsize | format.RoCompatReadonly,
|
|
|
+ MinExtraIsize: extraIsize,
|
|
|
+ WantExtraIsize: extraIsize,
|
|
|
+ LogGroupsPerFlex: 31,
|
|
|
+ }
|
|
|
+ if w.supportInlineData {
|
|
|
+ sb.FeatureIncompat |= format.IncompatInlineData
|
|
|
+ }
|
|
|
+ binary.Write(b, binary.LittleEndian, sb)
|
|
|
+ w.seekBlock(0)
|
|
|
+ if _, err := w.write(blk[:]); err != nil {
|
|
|
+ return err
|
|
|
+ }
|
|
|
+ w.seekBlock(diskSize)
|
|
|
+ return w.err
|
|
|
+}
|