tar.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. //go:build windows
  2. // +build windows
  3. package backuptar
  4. import (
  5. "archive/tar"
  6. "encoding/base64"
  7. "fmt"
  8. "io"
  9. "path/filepath"
  10. "strconv"
  11. "strings"
  12. "syscall"
  13. "time"
  14. "github.com/Microsoft/go-winio"
  15. "golang.org/x/sys/windows"
  16. )
  17. //nolint:deadcode,varcheck // keep unused constants for potential future use
  18. const (
  19. cISUID = 0004000 // Set uid
  20. cISGID = 0002000 // Set gid
  21. cISVTX = 0001000 // Save text (sticky bit)
  22. cISDIR = 0040000 // Directory
  23. cISFIFO = 0010000 // FIFO
  24. cISREG = 0100000 // Regular file
  25. cISLNK = 0120000 // Symbolic link
  26. cISBLK = 0060000 // Block special file
  27. cISCHR = 0020000 // Character special file
  28. cISSOCK = 0140000 // Socket
  29. )
  30. const (
  31. hdrFileAttributes = "MSWINDOWS.fileattr"
  32. hdrSecurityDescriptor = "MSWINDOWS.sd"
  33. hdrRawSecurityDescriptor = "MSWINDOWS.rawsd"
  34. hdrMountPoint = "MSWINDOWS.mountpoint"
  35. hdrEaPrefix = "MSWINDOWS.xattr."
  36. hdrCreationTime = "LIBARCHIVE.creationtime"
  37. )
  38. // zeroReader is an io.Reader that always returns 0s.
  39. type zeroReader struct{}
  40. func (zeroReader) Read(b []byte) (int, error) {
  41. for i := range b {
  42. b[i] = 0
  43. }
  44. return len(b), nil
  45. }
  46. func copySparse(t *tar.Writer, br *winio.BackupStreamReader) error {
  47. curOffset := int64(0)
  48. for {
  49. bhdr, err := br.Next()
  50. if err == io.EOF { //nolint:errorlint
  51. err = io.ErrUnexpectedEOF
  52. }
  53. if err != nil {
  54. return err
  55. }
  56. if bhdr.Id != winio.BackupSparseBlock {
  57. return fmt.Errorf("unexpected stream %d", bhdr.Id)
  58. }
  59. // We can't seek backwards, since we have already written that data to the tar.Writer.
  60. if bhdr.Offset < curOffset {
  61. return fmt.Errorf("cannot seek back from %d to %d", curOffset, bhdr.Offset)
  62. }
  63. // archive/tar does not support writing sparse files
  64. // so just write zeroes to catch up to the current offset.
  65. if _, err = io.CopyN(t, zeroReader{}, bhdr.Offset-curOffset); err != nil {
  66. return fmt.Errorf("seek to offset %d: %w", bhdr.Offset, err)
  67. }
  68. if bhdr.Size == 0 {
  69. // A sparse block with size = 0 is used to mark the end of the sparse blocks.
  70. break
  71. }
  72. n, err := io.Copy(t, br)
  73. if err != nil {
  74. return err
  75. }
  76. if n != bhdr.Size {
  77. return fmt.Errorf("copied %d bytes instead of %d at offset %d", n, bhdr.Size, bhdr.Offset)
  78. }
  79. curOffset = bhdr.Offset + n
  80. }
  81. return nil
  82. }
  83. // BasicInfoHeader creates a tar header from basic file information.
  84. func BasicInfoHeader(name string, size int64, fileInfo *winio.FileBasicInfo) *tar.Header {
  85. hdr := &tar.Header{
  86. Format: tar.FormatPAX,
  87. Name: filepath.ToSlash(name),
  88. Size: size,
  89. Typeflag: tar.TypeReg,
  90. ModTime: time.Unix(0, fileInfo.LastWriteTime.Nanoseconds()),
  91. ChangeTime: time.Unix(0, fileInfo.ChangeTime.Nanoseconds()),
  92. AccessTime: time.Unix(0, fileInfo.LastAccessTime.Nanoseconds()),
  93. PAXRecords: make(map[string]string),
  94. }
  95. hdr.PAXRecords[hdrFileAttributes] = fmt.Sprintf("%d", fileInfo.FileAttributes)
  96. hdr.PAXRecords[hdrCreationTime] = formatPAXTime(time.Unix(0, fileInfo.CreationTime.Nanoseconds()))
  97. if (fileInfo.FileAttributes & syscall.FILE_ATTRIBUTE_DIRECTORY) != 0 {
  98. hdr.Mode |= cISDIR
  99. hdr.Size = 0
  100. hdr.Typeflag = tar.TypeDir
  101. }
  102. return hdr
  103. }
  104. // SecurityDescriptorFromTarHeader reads the SDDL associated with the header of the current file
  105. // from the tar header and returns the security descriptor into a byte slice.
  106. func SecurityDescriptorFromTarHeader(hdr *tar.Header) ([]byte, error) {
  107. if sdraw, ok := hdr.PAXRecords[hdrRawSecurityDescriptor]; ok {
  108. sd, err := base64.StdEncoding.DecodeString(sdraw)
  109. if err != nil {
  110. // Not returning sd as-is in the error-case, as base64.DecodeString
  111. // may return partially decoded data (not nil or empty slice) in case
  112. // of a failure: https://github.com/golang/go/blob/go1.17.7/src/encoding/base64/base64.go#L382-L387
  113. return nil, err
  114. }
  115. return sd, nil
  116. }
  117. // Maintaining old SDDL-based behavior for backward compatibility. All new
  118. // tar headers written by this library will have raw binary for the security
  119. // descriptor.
  120. if sddl, ok := hdr.PAXRecords[hdrSecurityDescriptor]; ok {
  121. return winio.SddlToSecurityDescriptor(sddl)
  122. }
  123. return nil, nil
  124. }
  125. // ExtendedAttributesFromTarHeader reads the EAs associated with the header of the
  126. // current file from the tar header and returns it as a byte slice.
  127. func ExtendedAttributesFromTarHeader(hdr *tar.Header) ([]byte, error) {
  128. var eas []winio.ExtendedAttribute //nolint:prealloc // len(eas) <= len(hdr.PAXRecords); prealloc is wasteful
  129. for k, v := range hdr.PAXRecords {
  130. if !strings.HasPrefix(k, hdrEaPrefix) {
  131. continue
  132. }
  133. data, err := base64.StdEncoding.DecodeString(v)
  134. if err != nil {
  135. return nil, err
  136. }
  137. eas = append(eas, winio.ExtendedAttribute{
  138. Name: k[len(hdrEaPrefix):],
  139. Value: data,
  140. })
  141. }
  142. var eaData []byte
  143. var err error
  144. if len(eas) != 0 {
  145. eaData, err = winio.EncodeExtendedAttributes(eas)
  146. if err != nil {
  147. return nil, err
  148. }
  149. }
  150. return eaData, nil
  151. }
  152. // EncodeReparsePointFromTarHeader reads the ReparsePoint structure from the tar header
  153. // and encodes it into a byte slice. The file for which this function is called must be a
  154. // symlink.
  155. func EncodeReparsePointFromTarHeader(hdr *tar.Header) []byte {
  156. _, isMountPoint := hdr.PAXRecords[hdrMountPoint]
  157. rp := winio.ReparsePoint{
  158. Target: filepath.FromSlash(hdr.Linkname),
  159. IsMountPoint: isMountPoint,
  160. }
  161. return winio.EncodeReparsePoint(&rp)
  162. }
  163. // WriteTarFileFromBackupStream writes a file to a tar writer using data from a Win32 backup stream.
  164. //
  165. // This encodes Win32 metadata as tar pax vendor extensions starting with MSWINDOWS.
  166. //
  167. // The additional Win32 metadata is:
  168. //
  169. // - MSWINDOWS.fileattr: The Win32 file attributes, as a decimal value
  170. // - MSWINDOWS.rawsd: The Win32 security descriptor, in raw binary format
  171. // - MSWINDOWS.mountpoint: If present, this is a mount point and not a symlink, even though the type is '2' (symlink)
  172. func WriteTarFileFromBackupStream(t *tar.Writer, r io.Reader, name string, size int64, fileInfo *winio.FileBasicInfo) error {
  173. name = filepath.ToSlash(name)
  174. hdr := BasicInfoHeader(name, size, fileInfo)
  175. // If r can be seeked, then this function is two-pass: pass 1 collects the
  176. // tar header data, and pass 2 copies the data stream. If r cannot be
  177. // seeked, then some header data (in particular EAs) will be silently lost.
  178. var (
  179. restartPos int64
  180. err error
  181. )
  182. sr, readTwice := r.(io.Seeker)
  183. if readTwice {
  184. if restartPos, err = sr.Seek(0, io.SeekCurrent); err != nil {
  185. readTwice = false
  186. }
  187. }
  188. br := winio.NewBackupStreamReader(r)
  189. var dataHdr *winio.BackupHeader
  190. for dataHdr == nil {
  191. bhdr, err := br.Next()
  192. if err == io.EOF { //nolint:errorlint
  193. break
  194. }
  195. if err != nil {
  196. return err
  197. }
  198. switch bhdr.Id {
  199. case winio.BackupData:
  200. hdr.Mode |= cISREG
  201. if !readTwice {
  202. dataHdr = bhdr
  203. }
  204. case winio.BackupSecurity:
  205. sd, err := io.ReadAll(br)
  206. if err != nil {
  207. return err
  208. }
  209. hdr.PAXRecords[hdrRawSecurityDescriptor] = base64.StdEncoding.EncodeToString(sd)
  210. case winio.BackupReparseData:
  211. hdr.Mode |= cISLNK
  212. hdr.Typeflag = tar.TypeSymlink
  213. reparseBuffer, _ := io.ReadAll(br)
  214. rp, err := winio.DecodeReparsePoint(reparseBuffer)
  215. if err != nil {
  216. return err
  217. }
  218. if rp.IsMountPoint {
  219. hdr.PAXRecords[hdrMountPoint] = "1"
  220. }
  221. hdr.Linkname = rp.Target
  222. case winio.BackupEaData:
  223. eab, err := io.ReadAll(br)
  224. if err != nil {
  225. return err
  226. }
  227. eas, err := winio.DecodeExtendedAttributes(eab)
  228. if err != nil {
  229. return err
  230. }
  231. for _, ea := range eas {
  232. // Use base64 encoding for the binary value. Note that there
  233. // is no way to encode the EA's flags, since their use doesn't
  234. // make any sense for persisted EAs.
  235. hdr.PAXRecords[hdrEaPrefix+ea.Name] = base64.StdEncoding.EncodeToString(ea.Value)
  236. }
  237. case winio.BackupAlternateData, winio.BackupLink, winio.BackupPropertyData, winio.BackupObjectId, winio.BackupTxfsData:
  238. // ignore these streams
  239. default:
  240. return fmt.Errorf("%s: unknown stream ID %d", name, bhdr.Id)
  241. }
  242. }
  243. err = t.WriteHeader(hdr)
  244. if err != nil {
  245. return err
  246. }
  247. if readTwice {
  248. // Get back to the data stream.
  249. if _, err = sr.Seek(restartPos, io.SeekStart); err != nil {
  250. return err
  251. }
  252. for dataHdr == nil {
  253. bhdr, err := br.Next()
  254. if err == io.EOF { //nolint:errorlint
  255. break
  256. }
  257. if err != nil {
  258. return err
  259. }
  260. if bhdr.Id == winio.BackupData {
  261. dataHdr = bhdr
  262. }
  263. }
  264. }
  265. // The logic for copying file contents is fairly complicated due to the need for handling sparse files,
  266. // and the weird ways they are represented by BackupRead. A normal file will always either have a data stream
  267. // with size and content, or no data stream at all (if empty). However, for a sparse file, the content can also
  268. // be represented using a series of sparse block streams following the data stream. Additionally, the way sparse
  269. // files are handled by BackupRead has changed in the OS recently. The specifics of the representation are described
  270. // in the list at the bottom of this block comment.
  271. //
  272. // Sparse files can be represented in four different ways, based on the specifics of the file.
  273. // - Size = 0:
  274. // Previously: BackupRead yields no data stream and no sparse block streams.
  275. // Recently: BackupRead yields a data stream with size = 0. There are no following sparse block streams.
  276. // - Size > 0, no allocated ranges:
  277. // BackupRead yields a data stream with size = 0. Following is a single sparse block stream with
  278. // size = 0 and offset = <file size>.
  279. // - Size > 0, one allocated range:
  280. // BackupRead yields a data stream with size = <file size> containing the file contents. There are no
  281. // sparse block streams. This is the case if you take a normal file with contents and simply set the
  282. // sparse flag on it.
  283. // - Size > 0, multiple allocated ranges:
  284. // BackupRead yields a data stream with size = 0. Following are sparse block streams for each allocated
  285. // range of the file containing the range contents. Finally there is a sparse block stream with
  286. // size = 0 and offset = <file size>.
  287. if dataHdr != nil { //nolint:nestif // todo: reduce nesting complexity
  288. // A data stream was found. Copy the data.
  289. // We assume that we will either have a data stream size > 0 XOR have sparse block streams.
  290. if dataHdr.Size > 0 || (dataHdr.Attributes&winio.StreamSparseAttributes) == 0 {
  291. if size != dataHdr.Size {
  292. return fmt.Errorf("%s: mismatch between file size %d and header size %d", name, size, dataHdr.Size)
  293. }
  294. if _, err = io.Copy(t, br); err != nil {
  295. return fmt.Errorf("%s: copying contents from data stream: %w", name, err)
  296. }
  297. } else if size > 0 {
  298. // As of a recent OS change, BackupRead now returns a data stream for empty sparse files.
  299. // These files have no sparse block streams, so skip the copySparse call if file size = 0.
  300. if err = copySparse(t, br); err != nil {
  301. return fmt.Errorf("%s: copying contents from sparse block stream: %w", name, err)
  302. }
  303. }
  304. }
  305. // Look for streams after the data stream. The only ones we handle are alternate data streams.
  306. // Other streams may have metadata that could be serialized, but the tar header has already
  307. // been written. In practice, this means that we don't get EA or TXF metadata.
  308. for {
  309. bhdr, err := br.Next()
  310. if err == io.EOF { //nolint:errorlint
  311. break
  312. }
  313. if err != nil {
  314. return err
  315. }
  316. switch bhdr.Id {
  317. case winio.BackupAlternateData:
  318. if (bhdr.Attributes & winio.StreamSparseAttributes) != 0 {
  319. // Unsupported for now, since the size of the alternate stream is not present
  320. // in the backup stream until after the data has been read.
  321. return fmt.Errorf("%s: tar of sparse alternate data streams is unsupported", name)
  322. }
  323. altName := strings.TrimSuffix(bhdr.Name, ":$DATA")
  324. hdr = &tar.Header{
  325. Format: hdr.Format,
  326. Name: name + altName,
  327. Mode: hdr.Mode,
  328. Typeflag: tar.TypeReg,
  329. Size: bhdr.Size,
  330. ModTime: hdr.ModTime,
  331. AccessTime: hdr.AccessTime,
  332. ChangeTime: hdr.ChangeTime,
  333. }
  334. err = t.WriteHeader(hdr)
  335. if err != nil {
  336. return err
  337. }
  338. _, err = io.Copy(t, br)
  339. if err != nil {
  340. return err
  341. }
  342. case winio.BackupEaData, winio.BackupLink, winio.BackupPropertyData, winio.BackupObjectId, winio.BackupTxfsData:
  343. // ignore these streams
  344. default:
  345. return fmt.Errorf("%s: unknown stream ID %d after data", name, bhdr.Id)
  346. }
  347. }
  348. return nil
  349. }
  350. // FileInfoFromHeader retrieves basic Win32 file information from a tar header, using the additional metadata written by
  351. // WriteTarFileFromBackupStream.
  352. func FileInfoFromHeader(hdr *tar.Header) (name string, size int64, fileInfo *winio.FileBasicInfo, err error) {
  353. name = hdr.Name
  354. if hdr.Typeflag == tar.TypeReg || hdr.Typeflag == tar.TypeRegA {
  355. size = hdr.Size
  356. }
  357. fileInfo = &winio.FileBasicInfo{
  358. LastAccessTime: windows.NsecToFiletime(hdr.AccessTime.UnixNano()),
  359. LastWriteTime: windows.NsecToFiletime(hdr.ModTime.UnixNano()),
  360. ChangeTime: windows.NsecToFiletime(hdr.ChangeTime.UnixNano()),
  361. // Default to ModTime, we'll pull hdrCreationTime below if present
  362. CreationTime: windows.NsecToFiletime(hdr.ModTime.UnixNano()),
  363. }
  364. if attrStr, ok := hdr.PAXRecords[hdrFileAttributes]; ok {
  365. attr, err := strconv.ParseUint(attrStr, 10, 32)
  366. if err != nil {
  367. return "", 0, nil, err
  368. }
  369. fileInfo.FileAttributes = uint32(attr)
  370. } else {
  371. if hdr.Typeflag == tar.TypeDir {
  372. fileInfo.FileAttributes |= syscall.FILE_ATTRIBUTE_DIRECTORY
  373. }
  374. }
  375. if creationTimeStr, ok := hdr.PAXRecords[hdrCreationTime]; ok {
  376. creationTime, err := parsePAXTime(creationTimeStr)
  377. if err != nil {
  378. return "", 0, nil, err
  379. }
  380. fileInfo.CreationTime = windows.NsecToFiletime(creationTime.UnixNano())
  381. }
  382. return name, size, fileInfo, err
  383. }
  384. // WriteBackupStreamFromTarFile writes a Win32 backup stream from the current tar file. Since this function may process multiple
  385. // tar file entries in order to collect all the alternate data streams for the file, it returns the next
  386. // tar file that was not processed, or io.EOF is there are no more.
  387. func WriteBackupStreamFromTarFile(w io.Writer, t *tar.Reader, hdr *tar.Header) (*tar.Header, error) {
  388. bw := winio.NewBackupStreamWriter(w)
  389. sd, err := SecurityDescriptorFromTarHeader(hdr)
  390. if err != nil {
  391. return nil, err
  392. }
  393. if len(sd) != 0 {
  394. bhdr := winio.BackupHeader{
  395. Id: winio.BackupSecurity,
  396. Size: int64(len(sd)),
  397. }
  398. err := bw.WriteHeader(&bhdr)
  399. if err != nil {
  400. return nil, err
  401. }
  402. _, err = bw.Write(sd)
  403. if err != nil {
  404. return nil, err
  405. }
  406. }
  407. eadata, err := ExtendedAttributesFromTarHeader(hdr)
  408. if err != nil {
  409. return nil, err
  410. }
  411. if len(eadata) != 0 {
  412. bhdr := winio.BackupHeader{
  413. Id: winio.BackupEaData,
  414. Size: int64(len(eadata)),
  415. }
  416. err = bw.WriteHeader(&bhdr)
  417. if err != nil {
  418. return nil, err
  419. }
  420. _, err = bw.Write(eadata)
  421. if err != nil {
  422. return nil, err
  423. }
  424. }
  425. if hdr.Typeflag == tar.TypeSymlink {
  426. reparse := EncodeReparsePointFromTarHeader(hdr)
  427. bhdr := winio.BackupHeader{
  428. Id: winio.BackupReparseData,
  429. Size: int64(len(reparse)),
  430. }
  431. err := bw.WriteHeader(&bhdr)
  432. if err != nil {
  433. return nil, err
  434. }
  435. _, err = bw.Write(reparse)
  436. if err != nil {
  437. return nil, err
  438. }
  439. }
  440. if hdr.Typeflag == tar.TypeReg || hdr.Typeflag == tar.TypeRegA {
  441. bhdr := winio.BackupHeader{
  442. Id: winio.BackupData,
  443. Size: hdr.Size,
  444. }
  445. err := bw.WriteHeader(&bhdr)
  446. if err != nil {
  447. return nil, err
  448. }
  449. _, err = io.Copy(bw, t)
  450. if err != nil {
  451. return nil, err
  452. }
  453. }
  454. // Copy all the alternate data streams and return the next non-ADS header.
  455. for {
  456. ahdr, err := t.Next()
  457. if err != nil {
  458. return nil, err
  459. }
  460. if ahdr.Typeflag != tar.TypeReg || !strings.HasPrefix(ahdr.Name, hdr.Name+":") {
  461. return ahdr, nil
  462. }
  463. bhdr := winio.BackupHeader{
  464. Id: winio.BackupAlternateData,
  465. Size: ahdr.Size,
  466. Name: ahdr.Name[len(hdr.Name):] + ":$DATA",
  467. }
  468. err = bw.WriteHeader(&bhdr)
  469. if err != nil {
  470. return nil, err
  471. }
  472. _, err = io.Copy(bw, t)
  473. if err != nil {
  474. return nil, err
  475. }
  476. }
  477. }