1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
23 ErrHeader = errors.New("archive/tar: invalid tar header")
26 const maxNanoSecondIntSize = 9
28 // A Reader provides sequential access to the contents of a tar archive.
29 // A tar archive consists of a sequence of files.
30 // The Next method advances to the next file in the archive (including the first),
31 // and then it can be treated as an io.Reader to access the file's data.
35 pad int64 // amount of padding (ignored) after current file entry
36 curr numBytesReader // reader for current file entry
37 hdrBuff [blockSize]byte // buffer to use in readHeader
39 RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this.
40 rawBytes *bytes.Buffer // last raw bits
44 err error // Last error seen
47 // RawBytes accesses the raw bytes of the archive, apart from the file payload itself.
48 // This includes the header and padding.
50 // This call resets the current rawbytes buffer
52 // Only when RawAccounting is enabled, otherwise this returns nil
53 func (tr *Reader) RawBytes() []byte {
54 if !tr.RawAccounting {
57 if tr.rawBytes == nil {
58 tr.rawBytes = bytes.NewBuffer(nil)
60 // if we've read them, then flush them.
61 defer tr.rawBytes.Reset()
62 return tr.rawBytes.Bytes()
65 // A numBytesReader is an io.Reader with a numBytes method, returning the number
66 // of bytes remaining in the underlying encoded data.
67 type numBytesReader interface {
72 // A regFileReader is a numBytesReader for reading file data from a tar archive.
73 type regFileReader struct {
74 r io.Reader // underlying reader
75 nb int64 // number of unread bytes for current file entry
78 // A sparseFileReader is a numBytesReader for reading sparse file data from a
80 type sparseFileReader struct {
81 rfr numBytesReader // Reads the sparse-encoded file data
82 sp []sparseEntry // The sparse map for the file
83 pos int64 // Keeps track of file position
84 total int64 // Total size of the file
87 // A sparseEntry holds a single entry in a sparse file's sparse map.
89 // Sparse files are represented using a series of sparseEntrys.
90 // Despite the name, a sparseEntry represents an actual data fragment that
91 // references data found in the underlying archive stream. All regions not
92 // covered by a sparseEntry are logically filled with zeros.
94 // For example, if the underlying raw file contains the 10-byte data:
95 // var compactData = "abcdefgh"
97 // And the sparse map has the following entries:
98 // var sp = []sparseEntry{
99 // {offset: 2, numBytes: 5} // Data fragment for [2..7]
100 // {offset: 18, numBytes: 3} // Data fragment for [18..21]
103 // Then the content of the resulting sparse file with a "real" size of 25 is:
104 // var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
105 type sparseEntry struct {
106 offset int64 // Starting position of the fragment
107 numBytes int64 // Length of the fragment
110 // Keywords for GNU sparse files in a PAX extended header
112 paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
113 paxGNUSparseOffset = "GNU.sparse.offset"
114 paxGNUSparseNumBytes = "GNU.sparse.numbytes"
115 paxGNUSparseMap = "GNU.sparse.map"
116 paxGNUSparseName = "GNU.sparse.name"
117 paxGNUSparseMajor = "GNU.sparse.major"
118 paxGNUSparseMinor = "GNU.sparse.minor"
119 paxGNUSparseSize = "GNU.sparse.size"
120 paxGNUSparseRealSize = "GNU.sparse.realsize"
123 // Keywords for old GNU sparse headers
125 oldGNUSparseMainHeaderOffset = 386
126 oldGNUSparseMainHeaderIsExtendedOffset = 482
127 oldGNUSparseMainHeaderNumEntries = 4
128 oldGNUSparseExtendedHeaderIsExtendedOffset = 504
129 oldGNUSparseExtendedHeaderNumEntries = 21
130 oldGNUSparseOffsetSize = 12
131 oldGNUSparseNumBytesSize = 12
134 // NewReader creates a new Reader reading from r.
135 func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
137 // Next advances to the next entry in the tar archive.
139 // io.EOF is returned at the end of the input.
140 func (tr *Reader) Next() (*Header, error) {
141 if tr.RawAccounting {
142 if tr.rawBytes == nil {
143 tr.rawBytes = bytes.NewBuffer(nil)
154 var extHdrs map[string]string
156 // Externally, Next iterates through the tar archive as if it is a series of
157 // files. Internally, the tar format often uses fake "files" to add meta
158 // data that describes the next file. These meta data "files" should not
159 // normally be visible to the outside. As such, this loop iterates through
160 // one or more "header files" until it finds a "normal file".
163 tr.err = tr.skipUnread()
168 hdr = tr.readHeader()
172 // Check for PAX/GNU special headers and files.
173 switch hdr.Typeflag {
175 extHdrs, tr.err = parsePAX(tr)
179 continue loop // This is a meta header affecting the next header
180 case TypeGNULongName, TypeGNULongLink:
182 realname, tr.err = ioutil.ReadAll(tr)
187 if tr.RawAccounting {
188 if _, tr.err = tr.rawBytes.Write(realname); tr.err != nil {
193 // Convert GNU extensions to use PAX headers.
195 extHdrs = make(map[string]string)
198 switch hdr.Typeflag {
199 case TypeGNULongName:
200 extHdrs[paxPath] = p.parseString(realname)
201 case TypeGNULongLink:
202 extHdrs[paxLinkpath] = p.parseString(realname)
208 continue loop // This is a meta header affecting the next header
210 mergePAX(hdr, extHdrs)
212 // Check for a PAX format sparse file
213 sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs)
219 // Current file is a PAX format GNU sparse file.
220 // Set the current file reader to a sparse file reader.
221 tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
226 break loop // This is a file, so stop
232 // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
233 // this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to
234 // be treated as a regular file.
235 func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) {
236 var sparseFormat string
238 // Check for sparse format indicators
239 major, majorOk := headers[paxGNUSparseMajor]
240 minor, minorOk := headers[paxGNUSparseMinor]
241 sparseName, sparseNameOk := headers[paxGNUSparseName]
242 _, sparseMapOk := headers[paxGNUSparseMap]
243 sparseSize, sparseSizeOk := headers[paxGNUSparseSize]
244 sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize]
246 // Identify which, if any, sparse format applies from which PAX headers are set
247 if majorOk && minorOk {
248 sparseFormat = major + "." + minor
249 } else if sparseNameOk && sparseMapOk {
251 } else if sparseSizeOk {
254 // Not a PAX format GNU sparse file.
258 // Check for unknown sparse format
259 if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" {
263 // Update hdr from GNU sparse PAX headers
265 hdr.Name = sparseName
268 realSize, err := strconv.ParseInt(sparseSize, 10, 0)
270 return nil, ErrHeader
273 } else if sparseRealSizeOk {
274 realSize, err := strconv.ParseInt(sparseRealSize, 10, 0)
276 return nil, ErrHeader
281 // Set up the sparse map, according to the particular sparse format in use
284 switch sparseFormat {
286 sp, err = readGNUSparseMap0x1(headers)
288 sp, err = readGNUSparseMap1x0(tr.curr)
293 // mergePAX merges well known headers according to PAX standard.
294 // In general headers with the same name as those found
295 // in the header struct overwrite those found in the header
296 // struct with higher precision or longer values. Esp. useful
297 // for name and linkname fields.
298 func mergePAX(hdr *Header, headers map[string]string) error {
299 for k, v := range headers {
310 uid, err := strconv.ParseInt(v, 10, 0)
316 gid, err := strconv.ParseInt(v, 10, 0)
322 t, err := parsePAXTime(v)
328 t, err := parsePAXTime(v)
334 t, err := parsePAXTime(v)
340 size, err := strconv.ParseInt(v, 10, 0)
344 hdr.Size = int64(size)
346 if strings.HasPrefix(k, paxXattr) {
347 if hdr.Xattrs == nil {
348 hdr.Xattrs = make(map[string]string)
350 hdr.Xattrs[k[len(paxXattr):]] = v
357 // parsePAXTime takes a string of the form %d.%d as described in
358 // the PAX specification.
359 func parsePAXTime(t string) (time.Time, error) {
361 pos := bytes.IndexByte(buf, '.')
362 var seconds, nanoseconds int64
365 seconds, err = strconv.ParseInt(t, 10, 0)
367 return time.Time{}, err
370 seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0)
372 return time.Time{}, err
374 nano_buf := string(buf[pos+1:])
375 // Pad as needed before converting to a decimal.
376 // For example .030 -> .030000000 -> 30000000 nanoseconds
377 if len(nano_buf) < maxNanoSecondIntSize {
379 nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf))
380 } else if len(nano_buf) > maxNanoSecondIntSize {
382 nano_buf = nano_buf[:maxNanoSecondIntSize]
384 nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0)
386 return time.Time{}, err
389 ts := time.Unix(seconds, nanoseconds)
393 // parsePAX parses PAX headers.
394 // If an extended header (type 'x') is invalid, ErrHeader is returned
395 func parsePAX(r io.Reader) (map[string]string, error) {
396 buf, err := ioutil.ReadAll(r)
400 // leaving this function for io.Reader makes it more testable
401 if tr, ok := r.(*Reader); ok && tr.RawAccounting {
402 if _, err = tr.rawBytes.Write(buf); err != nil {
408 // For GNU PAX sparse format 0.0 support.
409 // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers.
410 var sparseMap bytes.Buffer
412 headers := make(map[string]string)
413 // Each record is constructed as
414 // "%d %s=%s\n", length, keyword, value
416 key, value, residual, err := parsePAXRecord(sbuf)
418 return nil, ErrHeader
422 keyStr := string(key)
423 if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes {
424 // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
425 sparseMap.WriteString(value)
426 sparseMap.Write([]byte{','})
428 // Normal key. Set the value in the headers map.
429 headers[keyStr] = string(value)
432 if sparseMap.Len() != 0 {
433 // Add sparse info to headers, chopping off the extra comma
434 sparseMap.Truncate(sparseMap.Len() - 1)
435 headers[paxGNUSparseMap] = sparseMap.String()
440 // parsePAXRecord parses the input PAX record string into a key-value pair.
441 // If parsing is successful, it will slice off the currently read record and
442 // return the remainder as r.
444 // A PAX record is of the following form:
445 // "%d %s=%s\n" % (size, key, value)
446 func parsePAXRecord(s string) (k, v, r string, err error) {
447 // The size field ends at the first space.
448 sp := strings.IndexByte(s, ' ')
450 return "", "", s, ErrHeader
453 // Parse the first token as a decimal integer.
454 n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
455 if perr != nil || n < 5 || int64(len(s)) < n {
456 return "", "", s, ErrHeader
459 // Extract everything between the space and the final newline.
460 rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
462 return "", "", s, ErrHeader
465 // The first equals separates the key from the value.
466 eq := strings.IndexByte(rec, '=')
468 return "", "", s, ErrHeader
470 return rec[:eq], rec[eq+1:], rem, nil
473 // parseString parses bytes as a NUL-terminated C-style string.
474 // If a NUL byte is not found then the whole slice is returned as a string.
475 func (*parser) parseString(b []byte) string {
477 for n < len(b) && b[n] != 0 {
480 return string(b[0:n])
483 // parseNumeric parses the input as being encoded in either base-256 or octal.
484 // This function may return negative numbers.
485 // If parsing fails or an integer overflow occurs, err will be set.
486 func (p *parser) parseNumeric(b []byte) int64 {
487 // Check for base-256 (binary) format first.
488 // If the first bit is set, then all following bits constitute a two's
489 // complement encoded number in big-endian byte order.
490 if len(b) > 0 && b[0]&0x80 != 0 {
491 // Handling negative numbers relies on the following identity:
494 // If the number is negative, we use an inversion mask to invert the
495 // data bytes and treat the value as an unsigned number.
496 var inv byte // 0x00 if positive or zero, 0xff if negative
502 for i, c := range b {
503 c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
505 c &= 0x7f // Ignore signal bit in first byte
508 p.err = ErrHeader // Integer overflow
514 p.err = ErrHeader // Integer overflow
523 // Normal case is base-8 (octal) format.
524 return p.parseOctal(b)
527 func (p *parser) parseOctal(b []byte) int64 {
528 // Because unused fields are filled with NULs, we need
529 // to skip leading NULs. Fields may also be padded with
531 // So we remove leading and trailing NULs and spaces to
533 b = bytes.Trim(b, " \x00")
538 x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
545 // skipUnread skips any unread bytes in the existing file entry, as well as any
546 // alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
547 // encountered in the data portion; it is okay to hit io.EOF in the padding.
549 // Note that this function still works properly even when sparse files are being
550 // used since numBytes returns the bytes remaining in the underlying io.Reader.
551 func (tr *Reader) skipUnread() error {
552 dataSkip := tr.numBytes() // Number of data bytes to skip
553 totalSkip := dataSkip + tr.pad // Total number of bytes to skip
554 tr.curr, tr.pad = nil, 0
555 if tr.RawAccounting {
556 _, tr.err = io.CopyN(tr.rawBytes, tr.r, totalSkip)
559 // If possible, Seek to the last byte before the end of the data section.
560 // Do this because Seek is often lazy about reporting errors; this will mask
561 // the fact that the tar stream may be truncated. We can rely on the
562 // io.CopyN done shortly afterwards to trigger any IO errors.
563 var seekSkipped int64 // Number of bytes skipped via Seek
564 if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
565 // Not all io.Seeker can actually Seek. For example, os.Stdin implements
566 // io.Seeker, but calling Seek always returns an error and performs
567 // no action. Thus, we try an innocent seek to the current position
568 // to see if Seek is really supported.
569 pos1, err := sr.Seek(0, os.SEEK_CUR)
571 // Seek seems supported, so perform the real Seek.
572 pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR)
577 seekSkipped = pos2 - pos1
581 var copySkipped int64 // Number of bytes skipped via CopyN
582 copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
583 if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip {
584 tr.err = io.ErrUnexpectedEOF
589 func (tr *Reader) verifyChecksum(header []byte) bool {
595 given := p.parseOctal(header[148:156])
596 unsigned, signed := checksum(header)
597 return p.err == nil && (given == unsigned || given == signed)
600 // readHeader reads the next block header and assumes that the underlying reader
601 // is already aligned to a block boundary.
603 // The err will be set to io.EOF only when one of the following occurs:
604 // * Exactly 0 bytes are read and EOF is hit.
605 // * Exactly 1 block of zeros is read and EOF is hit.
606 // * At least 2 blocks of zeros are read.
607 func (tr *Reader) readHeader() *Header {
608 header := tr.hdrBuff[:]
609 copy(header, zeroBlock)
611 if n, err := io.ReadFull(tr.r, header); err != nil {
613 // because it could read some of the block, but reach EOF first
614 if tr.err == io.EOF && tr.RawAccounting {
615 if _, err := tr.rawBytes.Write(header[:n]); err != nil {
619 return nil // io.EOF is okay here
621 if tr.RawAccounting {
622 if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
627 // Two blocks of zero bytes marks the end of the archive.
628 if bytes.Equal(header, zeroBlock[0:blockSize]) {
629 if n, err := io.ReadFull(tr.r, header); err != nil {
631 // because it could read some of the block, but reach EOF first
632 if tr.err == io.EOF && tr.RawAccounting {
633 if _, err := tr.rawBytes.Write(header[:n]); err != nil {
637 return nil // io.EOF is okay here
639 if tr.RawAccounting {
640 if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
644 if bytes.Equal(header, zeroBlock[0:blockSize]) {
647 tr.err = ErrHeader // zero block and then non-zero block
652 if !tr.verifyChecksum(header) {
662 hdr.Name = p.parseString(s.next(100))
663 hdr.Mode = p.parseNumeric(s.next(8))
664 hdr.Uid = int(p.parseNumeric(s.next(8)))
665 hdr.Gid = int(p.parseNumeric(s.next(8)))
666 hdr.Size = p.parseNumeric(s.next(12))
667 hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0)
669 hdr.Typeflag = s.next(1)[0]
670 hdr.Linkname = p.parseString(s.next(100))
672 // The remainder of the header depends on the value of magic.
673 // The original (v7) version of tar had no explicit magic field,
674 // so its magic bytes, like the rest of the block, are NULs.
675 magic := string(s.next(8)) // contains version field as well.
678 case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988)
679 if string(header[508:512]) == "tar\x00" {
684 case magic == "ustar \x00": // old GNU tar
689 case "posix", "gnu", "star":
690 hdr.Uname = p.parseString(s.next(32))
691 hdr.Gname = p.parseString(s.next(32))
692 devmajor := s.next(8)
693 devminor := s.next(8)
694 if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
695 hdr.Devmajor = p.parseNumeric(devmajor)
696 hdr.Devminor = p.parseNumeric(devminor)
701 prefix = p.parseString(s.next(155))
703 prefix = p.parseString(s.next(131))
704 hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0)
705 hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0)
708 hdr.Name = prefix + "/" + hdr.Name
718 if isHeaderOnlyType(hdr.Typeflag) {
726 // Set the current file reader.
727 tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
728 tr.curr = ®FileReader{r: tr.r, nb: nb}
730 // Check for old GNU sparse format entry.
731 if hdr.Typeflag == TypeGNUSparse {
732 // Get the real size of the file.
733 hdr.Size = p.parseNumeric(header[483:495])
739 // Read the sparse map.
740 sp := tr.readOldGNUSparseMap(header)
745 // Current file is a GNU sparse file. Update the current file reader.
746 tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
755 // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
756 // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
757 // then one or more extension headers are used to store the rest of the sparse map.
758 func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
760 isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0
761 spCap := oldGNUSparseMainHeaderNumEntries
763 spCap += oldGNUSparseExtendedHeaderNumEntries
765 sp := make([]sparseEntry, 0, spCap)
766 s := slicer(header[oldGNUSparseMainHeaderOffset:])
768 // Read the four entries from the main tar header
769 for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ {
770 offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
771 numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
776 if offset == 0 && numBytes == 0 {
779 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
783 // There are more entries. Read an extension header and parse its entries.
784 sparseHeader := make([]byte, blockSize)
785 if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil {
788 if tr.RawAccounting {
789 if _, tr.err = tr.rawBytes.Write(sparseHeader); tr.err != nil {
794 isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0
795 s = slicer(sparseHeader)
796 for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ {
797 offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
798 numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
803 if offset == 0 && numBytes == 0 {
806 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
812 // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
813 // version 1.0. The format of the sparse map consists of a series of
814 // newline-terminated numeric fields. The first field is the number of entries
815 // and is always present. Following this are the entries, consisting of two
816 // fields (offset, numBytes). This function must stop reading at the end
817 // boundary of the block containing the last newline.
819 // Note that the GNU manual says that numeric values should be encoded in octal
820 // format. However, the GNU tar utility itself outputs these values in decimal.
821 // As such, this library treats values as being encoded in decimal.
822 func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
825 var blk = make([]byte, blockSize)
827 // feedTokens copies data in numBlock chunks from r into buf until there are
828 // at least cnt newlines in buf. It will not read more blocks than needed.
829 var feedTokens = func(cnt int64) error {
830 for cntNewline < cnt {
831 if _, err := io.ReadFull(r, blk); err != nil {
833 err = io.ErrUnexpectedEOF
838 for _, c := range blk {
847 // nextToken gets the next token delimited by a newline. This assumes that
848 // at least one newline exists in the buffer.
849 var nextToken = func() string {
851 tok, _ := buf.ReadString('\n')
852 return tok[:len(tok)-1] // Cut off newline
855 // Parse for the number of entries.
856 // Use integer overflow resistant math to check this.
857 if err := feedTokens(1); err != nil {
860 numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int
861 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
862 return nil, ErrHeader
865 // Parse for all member entries.
866 // numEntries is trusted after this since a potential attacker must have
867 // committed resources proportional to what this library used.
868 if err := feedTokens(2 * numEntries); err != nil {
871 sp := make([]sparseEntry, 0, numEntries)
872 for i := int64(0); i < numEntries; i++ {
873 offset, err := strconv.ParseInt(nextToken(), 10, 64)
875 return nil, ErrHeader
877 numBytes, err := strconv.ParseInt(nextToken(), 10, 64)
879 return nil, ErrHeader
881 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
886 // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
887 // version 0.1. The sparse map is stored in the PAX headers.
888 func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) {
889 // Get number of entries.
890 // Use integer overflow resistant math to check this.
891 numEntriesStr := extHdrs[paxGNUSparseNumBlocks]
892 numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
893 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
894 return nil, ErrHeader
897 // There should be two numbers in sparseMap for each entry.
898 sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",")
899 if int64(len(sparseMap)) != 2*numEntries {
900 return nil, ErrHeader
903 // Loop through the entries in the sparse map.
904 // numEntries is trusted now.
905 sp := make([]sparseEntry, 0, numEntries)
906 for i := int64(0); i < numEntries; i++ {
907 offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64)
909 return nil, ErrHeader
911 numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64)
913 return nil, ErrHeader
915 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
920 // numBytes returns the number of bytes left to read in the current file's entry
921 // in the tar archive, or 0 if there is no current file.
922 func (tr *Reader) numBytes() int64 {
924 // No current file, so no bytes
927 return tr.curr.numBytes()
930 // Read reads from the current entry in the tar archive.
931 // It returns 0, io.EOF when it reaches the end of that entry,
932 // until Next is called to advance to the next entry.
934 // Calling Read on special types like TypeLink, TypeSymLink, TypeChar,
935 // TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what
936 // the Header.Size claims.
937 func (tr *Reader) Read(b []byte) (n int, err error) {
945 n, err = tr.curr.Read(b)
946 if err != nil && err != io.EOF {
952 func (rfr *regFileReader) Read(b []byte) (n int, err error) {
957 if int64(len(b)) > rfr.nb {
960 n, err = rfr.r.Read(b)
963 if err == io.EOF && rfr.nb > 0 {
964 err = io.ErrUnexpectedEOF
969 // numBytes returns the number of bytes left to read in the file's data in the tar archive.
970 func (rfr *regFileReader) numBytes() int64 {
974 // newSparseFileReader creates a new sparseFileReader, but validates all of the
975 // sparse entries before doing so.
976 func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
978 return nil, ErrHeader // Total size cannot be negative
981 // Validate all sparse entries. These are the same checks as performed by
982 // the BSD tar utility.
983 for i, s := range sp {
985 case s.offset < 0 || s.numBytes < 0:
986 return nil, ErrHeader // Negative values are never okay
987 case s.offset > math.MaxInt64-s.numBytes:
988 return nil, ErrHeader // Integer overflow with large length
989 case s.offset+s.numBytes > total:
990 return nil, ErrHeader // Region extends beyond the "real" size
991 case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
992 return nil, ErrHeader // Regions can't overlap and must be in order
995 return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
998 // readHole reads a sparse hole ending at endOffset.
999 func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
1000 n64 := endOffset - sfr.pos
1001 if n64 > int64(len(b)) {
1005 for i := 0; i < n; i++ {
1012 // Read reads the sparse file data in expanded form.
1013 func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
1014 // Skip past all empty fragments.
1015 for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
1019 // If there are no more fragments, then it is possible that there
1020 // is one last sparse hole.
1021 if len(sfr.sp) == 0 {
1022 // This behavior matches the BSD tar utility.
1023 // However, GNU tar stops returning data even if sfr.total is unmet.
1024 if sfr.pos < sfr.total {
1025 return sfr.readHole(b, sfr.total), nil
1030 // In front of a data fragment, so read a hole.
1031 if sfr.pos < sfr.sp[0].offset {
1032 return sfr.readHole(b, sfr.sp[0].offset), nil
1035 // In a data fragment, so read from it.
1036 // This math is overflow free since we verify that offset and numBytes can
1037 // be safely added when creating the sparseFileReader.
1038 endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
1039 bytesLeft := endPos - sfr.pos // Bytes left in fragment
1040 if int64(len(b)) > bytesLeft {
1044 n, err = sfr.rfr.Read(b)
1047 if sfr.pos < endPos {
1048 err = io.ErrUnexpectedEOF // There was supposed to be more data
1049 } else if sfr.pos < sfr.total {
1050 err = nil // There is still an implicit sparse hole at the end
1054 if sfr.pos == endPos {
1055 sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
1060 // numBytes returns the number of bytes left to read in the sparse file's
1061 // sparse-encoded data in the tar archive.
1062 func (sfr *sparseFileReader) numBytes() int64 {
1063 return sfr.rfr.numBytes()