vendor/github.com/vbatts/tar-split/archive/tar/reader.go

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package tar
   6
   7 // TODO(dsymonds):
   8 //   - pax extensions
   9
  10 import (
  11         "bytes"
  12         "errors"
  13         "io"
  14         "io/ioutil"
  15         "math"
  16         "os"
  17         "strconv"
  18         "strings"
  19         "time"
  20 )
  21
  22 var (
  23         ErrHeader = errors.New("archive/tar: invalid tar header")
  24 )
  25
  26 const maxNanoSecondIntSize = 9
  27
  28 // A Reader provides sequential access to the contents of a tar archive.
  29 // A tar archive consists of a sequence of files.
  30 // The Next method advances to the next file in the archive (including the first),
  31 // and then it can be treated as an io.Reader to access the file's data.
  32 type Reader struct {
  33         r       io.Reader
  34         err     error
  35         pad     int64           // amount of padding (ignored) after current file entry
  36         curr    numBytesReader  // reader for current file entry
  37         hdrBuff [blockSize]byte // buffer to use in readHeader
  38
  39         RawAccounting bool          // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this.
  40         rawBytes      *bytes.Buffer // last raw bits
  41 }
  42
  43 type parser struct {
  44         err error // Last error seen
  45 }
  46
  47 // RawBytes accesses the raw bytes of the archive, apart from the file payload itself.
  48 // This includes the header and padding.
  49 //
  50 // This call resets the current rawbytes buffer
  51 //
  52 // Only when RawAccounting is enabled, otherwise this returns nil
  53 func (tr *Reader) RawBytes() []byte {
  54         if !tr.RawAccounting {
  55                 return nil
  56         }
  57         if tr.rawBytes == nil {
  58                 tr.rawBytes = bytes.NewBuffer(nil)
  59         }
  60         // if we've read them, then flush them.
  61         defer tr.rawBytes.Reset()
  62         return tr.rawBytes.Bytes()
  63 }
  64
  65 // A numBytesReader is an io.Reader with a numBytes method, returning the number
  66 // of bytes remaining in the underlying encoded data.
  67 type numBytesReader interface {
  68         io.Reader
  69         numBytes() int64
  70 }
  71
  72 // A regFileReader is a numBytesReader for reading file data from a tar archive.
  73 type regFileReader struct {
  74         r  io.Reader // underlying reader
  75         nb int64     // number of unread bytes for current file entry
  76 }
  77
  78 // A sparseFileReader is a numBytesReader for reading sparse file data from a
  79 // tar archive.
  80 type sparseFileReader struct {
  81         rfr   numBytesReader // Reads the sparse-encoded file data
  82         sp    []sparseEntry  // The sparse map for the file
  83         pos   int64          // Keeps track of file position
  84         total int64          // Total size of the file
  85 }
  86
  87 // A sparseEntry holds a single entry in a sparse file's sparse map.
  88 //
  89 // Sparse files are represented using a series of sparseEntrys.
  90 // Despite the name, a sparseEntry represents an actual data fragment that
  91 // references data found in the underlying archive stream. All regions not
  92 // covered by a sparseEntry are logically filled with zeros.
  93 //
  94 // For example, if the underlying raw file contains the 10-byte data:
  95 //      var compactData = "abcdefgh"
  96 //
  97 // And the sparse map has the following entries:
  98 //      var sp = []sparseEntry{
  99 //              {offset: 2,  numBytes: 5} // Data fragment for [2..7]
 100 //              {offset: 18, numBytes: 3} // Data fragment for [18..21]
 101 //      }
 102 //
 103 // Then the content of the resulting sparse file with a "real" size of 25 is:
 104 //      var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
 105 type sparseEntry struct {
 106         offset   int64 // Starting position of the fragment
 107         numBytes int64 // Length of the fragment
 108 }
 109
 110 // Keywords for GNU sparse files in a PAX extended header
 111 const (
 112         paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
 113         paxGNUSparseOffset    = "GNU.sparse.offset"
 114         paxGNUSparseNumBytes  = "GNU.sparse.numbytes"
 115         paxGNUSparseMap       = "GNU.sparse.map"
 116         paxGNUSparseName      = "GNU.sparse.name"
 117         paxGNUSparseMajor     = "GNU.sparse.major"
 118         paxGNUSparseMinor     = "GNU.sparse.minor"
 119         paxGNUSparseSize      = "GNU.sparse.size"
 120         paxGNUSparseRealSize  = "GNU.sparse.realsize"
 121 )
 122
 123 // Keywords for old GNU sparse headers
 124 const (
 125         oldGNUSparseMainHeaderOffset               = 386
 126         oldGNUSparseMainHeaderIsExtendedOffset     = 482
 127         oldGNUSparseMainHeaderNumEntries           = 4
 128         oldGNUSparseExtendedHeaderIsExtendedOffset = 504
 129         oldGNUSparseExtendedHeaderNumEntries       = 21
 130         oldGNUSparseOffsetSize                     = 12
 131         oldGNUSparseNumBytesSize                   = 12
 132 )
 133
 134 // NewReader creates a new Reader reading from r.
 135 func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
 136
 137 // Next advances to the next entry in the tar archive.
 138 //
 139 // io.EOF is returned at the end of the input.
 140 func (tr *Reader) Next() (*Header, error) {
 141         if tr.RawAccounting {
 142                 if tr.rawBytes == nil {
 143                         tr.rawBytes = bytes.NewBuffer(nil)
 144                 } else {
 145                         tr.rawBytes.Reset()
 146                 }
 147         }
 148
 149         if tr.err != nil {
 150                 return nil, tr.err
 151         }
 152
 153         var hdr *Header
 154         var extHdrs map[string]string
 155
 156         // Externally, Next iterates through the tar archive as if it is a series of
 157         // files. Internally, the tar format often uses fake "files" to add meta
 158         // data that describes the next file. These meta data "files" should not
 159         // normally be visible to the outside. As such, this loop iterates through
 160         // one or more "header files" until it finds a "normal file".
 161 loop:
 162         for {
 163                 tr.err = tr.skipUnread()
 164                 if tr.err != nil {
 165                         return nil, tr.err
 166                 }
 167
 168                 hdr = tr.readHeader()
 169                 if tr.err != nil {
 170                         return nil, tr.err
 171                 }
 172                 // Check for PAX/GNU special headers and files.
 173                 switch hdr.Typeflag {
 174                 case TypeXHeader:
 175                         extHdrs, tr.err = parsePAX(tr)
 176                         if tr.err != nil {
 177                                 return nil, tr.err
 178                         }
 179                         continue loop // This is a meta header affecting the next header
 180                 case TypeGNULongName, TypeGNULongLink:
 181                         var realname []byte
 182                         realname, tr.err = ioutil.ReadAll(tr)
 183                         if tr.err != nil {
 184                                 return nil, tr.err
 185                         }
 186
 187                         if tr.RawAccounting {
 188                                 if _, tr.err = tr.rawBytes.Write(realname); tr.err != nil {
 189                                         return nil, tr.err
 190                                 }
 191                         }
 192
 193                         // Convert GNU extensions to use PAX headers.
 194                         if extHdrs == nil {
 195                                 extHdrs = make(map[string]string)
 196                         }
 197                         var p parser
 198                         switch hdr.Typeflag {
 199                         case TypeGNULongName:
 200                                 extHdrs[paxPath] = p.parseString(realname)
 201                         case TypeGNULongLink:
 202                                 extHdrs[paxLinkpath] = p.parseString(realname)
 203                         }
 204                         if p.err != nil {
 205                                 tr.err = p.err
 206                                 return nil, tr.err
 207                         }
 208                         continue loop // This is a meta header affecting the next header
 209                 default:
 210                         mergePAX(hdr, extHdrs)
 211
 212                         // Check for a PAX format sparse file
 213                         sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs)
 214                         if err != nil {
 215                                 tr.err = err
 216                                 return nil, err
 217                         }
 218                         if sp != nil {
 219                                 // Current file is a PAX format GNU sparse file.
 220                                 // Set the current file reader to a sparse file reader.
 221                                 tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
 222                                 if tr.err != nil {
 223                                         return nil, tr.err
 224                                 }
 225                         }
 226                         break loop // This is a file, so stop
 227                 }
 228         }
 229         return hdr, nil
 230 }
 231
 232 // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
 233 // this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to
 234 // be treated as a regular file.
 235 func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) {
 236         var sparseFormat string
 237
 238         // Check for sparse format indicators
 239         major, majorOk := headers[paxGNUSparseMajor]
 240         minor, minorOk := headers[paxGNUSparseMinor]
 241         sparseName, sparseNameOk := headers[paxGNUSparseName]
 242         _, sparseMapOk := headers[paxGNUSparseMap]
 243         sparseSize, sparseSizeOk := headers[paxGNUSparseSize]
 244         sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize]
 245
 246         // Identify which, if any, sparse format applies from which PAX headers are set
 247         if majorOk && minorOk {
 248                 sparseFormat = major + "." + minor
 249         } else if sparseNameOk && sparseMapOk {
 250                 sparseFormat = "0.1"
 251         } else if sparseSizeOk {
 252                 sparseFormat = "0.0"
 253         } else {
 254                 // Not a PAX format GNU sparse file.
 255                 return nil, nil
 256         }
 257
 258         // Check for unknown sparse format
 259         if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" {
 260                 return nil, nil
 261         }
 262
 263         // Update hdr from GNU sparse PAX headers
 264         if sparseNameOk {
 265                 hdr.Name = sparseName
 266         }
 267         if sparseSizeOk {
 268                 realSize, err := strconv.ParseInt(sparseSize, 10, 0)
 269                 if err != nil {
 270                         return nil, ErrHeader
 271                 }
 272                 hdr.Size = realSize
 273         } else if sparseRealSizeOk {
 274                 realSize, err := strconv.ParseInt(sparseRealSize, 10, 0)
 275                 if err != nil {
 276                         return nil, ErrHeader
 277                 }
 278                 hdr.Size = realSize
 279         }
 280
 281         // Set up the sparse map, according to the particular sparse format in use
 282         var sp []sparseEntry
 283         var err error
 284         switch sparseFormat {
 285         case "0.0", "0.1":
 286                 sp, err = readGNUSparseMap0x1(headers)
 287         case "1.0":
 288                 sp, err = readGNUSparseMap1x0(tr.curr)
 289         }
 290         return sp, err
 291 }
 292
 293 // mergePAX merges well known headers according to PAX standard.
 294 // In general headers with the same name as those found
 295 // in the header struct overwrite those found in the header
 296 // struct with higher precision or longer values. Esp. useful
 297 // for name and linkname fields.
 298 func mergePAX(hdr *Header, headers map[string]string) error {
 299         for k, v := range headers {
 300                 switch k {
 301                 case paxPath:
 302                         hdr.Name = v
 303                 case paxLinkpath:
 304                         hdr.Linkname = v
 305                 case paxGname:
 306                         hdr.Gname = v
 307                 case paxUname:
 308                         hdr.Uname = v
 309                 case paxUid:
 310                         uid, err := strconv.ParseInt(v, 10, 0)
 311                         if err != nil {
 312                                 return err
 313                         }
 314                         hdr.Uid = int(uid)
 315                 case paxGid:
 316                         gid, err := strconv.ParseInt(v, 10, 0)
 317                         if err != nil {
 318                                 return err
 319                         }
 320                         hdr.Gid = int(gid)
 321                 case paxAtime:
 322                         t, err := parsePAXTime(v)
 323                         if err != nil {
 324                                 return err
 325                         }
 326                         hdr.AccessTime = t
 327                 case paxMtime:
 328                         t, err := parsePAXTime(v)
 329                         if err != nil {
 330                                 return err
 331                         }
 332                         hdr.ModTime = t
 333                 case paxCtime:
 334                         t, err := parsePAXTime(v)
 335                         if err != nil {
 336                                 return err
 337                         }
 338                         hdr.ChangeTime = t
 339                 case paxSize:
 340                         size, err := strconv.ParseInt(v, 10, 0)
 341                         if err != nil {
 342                                 return err
 343                         }
 344                         hdr.Size = int64(size)
 345                 default:
 346                         if strings.HasPrefix(k, paxXattr) {
 347                                 if hdr.Xattrs == nil {
 348                                         hdr.Xattrs = make(map[string]string)
 349                                 }
 350                                 hdr.Xattrs[k[len(paxXattr):]] = v
 351                         }
 352                 }
 353         }
 354         return nil
 355 }
 356
 357 // parsePAXTime takes a string of the form %d.%d as described in
 358 // the PAX specification.
 359 func parsePAXTime(t string) (time.Time, error) {
 360         buf := []byte(t)
 361         pos := bytes.IndexByte(buf, '.')
 362         var seconds, nanoseconds int64
 363         var err error
 364         if pos == -1 {
 365                 seconds, err = strconv.ParseInt(t, 10, 0)
 366                 if err != nil {
 367                         return time.Time{}, err
 368                 }
 369         } else {
 370                 seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0)
 371                 if err != nil {
 372                         return time.Time{}, err
 373                 }
 374                 nano_buf := string(buf[pos+1:])
 375                 // Pad as needed before converting to a decimal.
 376                 // For example .030 -> .030000000 -> 30000000 nanoseconds
 377                 if len(nano_buf) < maxNanoSecondIntSize {
 378                         // Right pad
 379                         nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf))
 380                 } else if len(nano_buf) > maxNanoSecondIntSize {
 381                         // Right truncate
 382                         nano_buf = nano_buf[:maxNanoSecondIntSize]
 383                 }
 384                 nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0)
 385                 if err != nil {
 386                         return time.Time{}, err
 387                 }
 388         }
 389         ts := time.Unix(seconds, nanoseconds)
 390         return ts, nil
 391 }
 392
 393 // parsePAX parses PAX headers.
 394 // If an extended header (type 'x') is invalid, ErrHeader is returned
 395 func parsePAX(r io.Reader) (map[string]string, error) {
 396         buf, err := ioutil.ReadAll(r)
 397         if err != nil {
 398                 return nil, err
 399         }
 400         // leaving this function for io.Reader makes it more testable
 401         if tr, ok := r.(*Reader); ok && tr.RawAccounting {
 402                 if _, err = tr.rawBytes.Write(buf); err != nil {
 403                         return nil, err
 404                 }
 405         }
 406         sbuf := string(buf)
 407
 408         // For GNU PAX sparse format 0.0 support.
 409         // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers.
 410         var sparseMap bytes.Buffer
 411
 412         headers := make(map[string]string)
 413         // Each record is constructed as
 414         //     "%d %s=%s\n", length, keyword, value
 415         for len(sbuf) > 0 {
 416                 key, value, residual, err := parsePAXRecord(sbuf)
 417                 if err != nil {
 418                         return nil, ErrHeader
 419                 }
 420                 sbuf = residual
 421
 422                 keyStr := string(key)
 423                 if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes {
 424                         // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
 425                         sparseMap.WriteString(value)
 426                         sparseMap.Write([]byte{','})
 427                 } else {
 428                         // Normal key. Set the value in the headers map.
 429                         headers[keyStr] = string(value)
 430                 }
 431         }
 432         if sparseMap.Len() != 0 {
 433                 // Add sparse info to headers, chopping off the extra comma
 434                 sparseMap.Truncate(sparseMap.Len() - 1)
 435                 headers[paxGNUSparseMap] = sparseMap.String()
 436         }
 437         return headers, nil
 438 }
 439
 440 // parsePAXRecord parses the input PAX record string into a key-value pair.
 441 // If parsing is successful, it will slice off the currently read record and
 442 // return the remainder as r.
 443 //
 444 // A PAX record is of the following form:
 445 //      "%d %s=%s\n" % (size, key, value)
 446 func parsePAXRecord(s string) (k, v, r string, err error) {
 447         // The size field ends at the first space.
 448         sp := strings.IndexByte(s, ' ')
 449         if sp == -1 {
 450                 return "", "", s, ErrHeader
 451         }
 452
 453         // Parse the first token as a decimal integer.
 454         n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int
 455         if perr != nil || n < 5 || int64(len(s)) < n {
 456                 return "", "", s, ErrHeader
 457         }
 458
 459         // Extract everything between the space and the final newline.
 460         rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:]
 461         if nl != "\n" {
 462                 return "", "", s, ErrHeader
 463         }
 464
 465         // The first equals separates the key from the value.
 466         eq := strings.IndexByte(rec, '=')
 467         if eq == -1 {
 468                 return "", "", s, ErrHeader
 469         }
 470         return rec[:eq], rec[eq+1:], rem, nil
 471 }
 472
 473 // parseString parses bytes as a NUL-terminated C-style string.
 474 // If a NUL byte is not found then the whole slice is returned as a string.
 475 func (*parser) parseString(b []byte) string {
 476         n := 0
 477         for n < len(b) && b[n] != 0 {
 478                 n++
 479         }
 480         return string(b[0:n])
 481 }
 482
 483 // parseNumeric parses the input as being encoded in either base-256 or octal.
 484 // This function may return negative numbers.
 485 // If parsing fails or an integer overflow occurs, err will be set.
 486 func (p *parser) parseNumeric(b []byte) int64 {
 487         // Check for base-256 (binary) format first.
 488         // If the first bit is set, then all following bits constitute a two's
 489         // complement encoded number in big-endian byte order.
 490         if len(b) > 0 && b[0]&0x80 != 0 {
 491                 // Handling negative numbers relies on the following identity:
 492                 //      -a-1 == ^a
 493                 //
 494                 // If the number is negative, we use an inversion mask to invert the
 495                 // data bytes and treat the value as an unsigned number.
 496                 var inv byte // 0x00 if positive or zero, 0xff if negative
 497                 if b[0]&0x40 != 0 {
 498                         inv = 0xff
 499                 }
 500
 501                 var x uint64
 502                 for i, c := range b {
 503                         c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
 504                         if i == 0 {
 505                                 c &= 0x7f // Ignore signal bit in first byte
 506                         }
 507                         if (x >> 56) > 0 {
 508                                 p.err = ErrHeader // Integer overflow
 509                                 return 0
 510                         }
 511                         x = x<<8 | uint64(c)
 512                 }
 513                 if (x >> 63) > 0 {
 514                         p.err = ErrHeader // Integer overflow
 515                         return 0
 516                 }
 517                 if inv == 0xff {
 518                         return ^int64(x)
 519                 }
 520                 return int64(x)
 521         }
 522
 523         // Normal case is base-8 (octal) format.
 524         return p.parseOctal(b)
 525 }
 526
 527 func (p *parser) parseOctal(b []byte) int64 {
 528         // Because unused fields are filled with NULs, we need
 529         // to skip leading NULs. Fields may also be padded with
 530         // spaces or NULs.
 531         // So we remove leading and trailing NULs and spaces to
 532         // be sure.
 533         b = bytes.Trim(b, " \x00")
 534
 535         if len(b) == 0 {
 536                 return 0
 537         }
 538         x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
 539         if perr != nil {
 540                 p.err = ErrHeader
 541         }
 542         return int64(x)
 543 }
 544
 545 // skipUnread skips any unread bytes in the existing file entry, as well as any
 546 // alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
 547 // encountered in the data portion; it is okay to hit io.EOF in the padding.
 548 //
 549 // Note that this function still works properly even when sparse files are being
 550 // used since numBytes returns the bytes remaining in the underlying io.Reader.
 551 func (tr *Reader) skipUnread() error {
 552         dataSkip := tr.numBytes()      // Number of data bytes to skip
 553         totalSkip := dataSkip + tr.pad // Total number of bytes to skip
 554         tr.curr, tr.pad = nil, 0
 555         if tr.RawAccounting {
 556                 _, tr.err = io.CopyN(tr.rawBytes, tr.r, totalSkip)
 557                 return tr.err
 558         }
 559         // If possible, Seek to the last byte before the end of the data section.
 560         // Do this because Seek is often lazy about reporting errors; this will mask
 561         // the fact that the tar stream may be truncated. We can rely on the
 562         // io.CopyN done shortly afterwards to trigger any IO errors.
 563         var seekSkipped int64 // Number of bytes skipped via Seek
 564         if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
 565                 // Not all io.Seeker can actually Seek. For example, os.Stdin implements
 566                 // io.Seeker, but calling Seek always returns an error and performs
 567                 // no action. Thus, we try an innocent seek to the current position
 568                 // to see if Seek is really supported.
 569                 pos1, err := sr.Seek(0, os.SEEK_CUR)
 570                 if err == nil {
 571                         // Seek seems supported, so perform the real Seek.
 572                         pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR)
 573                         if err != nil {
 574                                 tr.err = err
 575                                 return tr.err
 576                         }
 577                         seekSkipped = pos2 - pos1
 578                 }
 579         }
 580
 581         var copySkipped int64 // Number of bytes skipped via CopyN
 582         copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
 583         if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip {
 584                 tr.err = io.ErrUnexpectedEOF
 585         }
 586         return tr.err
 587 }
 588
 589 func (tr *Reader) verifyChecksum(header []byte) bool {
 590         if tr.err != nil {
 591                 return false
 592         }
 593
 594         var p parser
 595         given := p.parseOctal(header[148:156])
 596         unsigned, signed := checksum(header)
 597         return p.err == nil && (given == unsigned || given == signed)
 598 }
 599
 600 // readHeader reads the next block header and assumes that the underlying reader
 601 // is already aligned to a block boundary.
 602 //
 603 // The err will be set to io.EOF only when one of the following occurs:
 604 //      * Exactly 0 bytes are read and EOF is hit.
 605 //      * Exactly 1 block of zeros is read and EOF is hit.
 606 //      * At least 2 blocks of zeros are read.
 607 func (tr *Reader) readHeader() *Header {
 608         header := tr.hdrBuff[:]
 609         copy(header, zeroBlock)
 610
 611         if n, err := io.ReadFull(tr.r, header); err != nil {
 612                 tr.err = err
 613                 // because it could read some of the block, but reach EOF first
 614                 if tr.err == io.EOF && tr.RawAccounting {
 615                         if _, err := tr.rawBytes.Write(header[:n]); err != nil {
 616                                 tr.err = err
 617                         }
 618                 }
 619                 return nil // io.EOF is okay here
 620         }
 621         if tr.RawAccounting {
 622                 if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
 623                         return nil
 624                 }
 625         }
 626
 627         // Two blocks of zero bytes marks the end of the archive.
 628         if bytes.Equal(header, zeroBlock[0:blockSize]) {
 629                 if n, err := io.ReadFull(tr.r, header); err != nil {
 630                         tr.err = err
 631                         // because it could read some of the block, but reach EOF first
 632                         if tr.err == io.EOF && tr.RawAccounting {
 633                                 if _, err := tr.rawBytes.Write(header[:n]); err != nil {
 634                                         tr.err = err
 635                                 }
 636                         }
 637                         return nil // io.EOF is okay here
 638                 }
 639                 if tr.RawAccounting {
 640                         if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
 641                                 return nil
 642                         }
 643                 }
 644                 if bytes.Equal(header, zeroBlock[0:blockSize]) {
 645                         tr.err = io.EOF
 646                 } else {
 647                         tr.err = ErrHeader // zero block and then non-zero block
 648                 }
 649                 return nil
 650         }
 651
 652         if !tr.verifyChecksum(header) {
 653                 tr.err = ErrHeader
 654                 return nil
 655         }
 656
 657         // Unpack
 658         var p parser
 659         hdr := new(Header)
 660         s := slicer(header)
 661
 662         hdr.Name = p.parseString(s.next(100))
 663         hdr.Mode = p.parseNumeric(s.next(8))
 664         hdr.Uid = int(p.parseNumeric(s.next(8)))
 665         hdr.Gid = int(p.parseNumeric(s.next(8)))
 666         hdr.Size = p.parseNumeric(s.next(12))
 667         hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0)
 668         s.next(8) // chksum
 669         hdr.Typeflag = s.next(1)[0]
 670         hdr.Linkname = p.parseString(s.next(100))
 671
 672         // The remainder of the header depends on the value of magic.
 673         // The original (v7) version of tar had no explicit magic field,
 674         // so its magic bytes, like the rest of the block, are NULs.
 675         magic := string(s.next(8)) // contains version field as well.
 676         var format string
 677         switch {
 678         case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988)
 679                 if string(header[508:512]) == "tar\x00" {
 680                         format = "star"
 681                 } else {
 682                         format = "posix"
 683                 }
 684         case magic == "ustar  \x00": // old GNU tar
 685                 format = "gnu"
 686         }
 687
 688         switch format {
 689         case "posix", "gnu", "star":
 690                 hdr.Uname = p.parseString(s.next(32))
 691                 hdr.Gname = p.parseString(s.next(32))
 692                 devmajor := s.next(8)
 693                 devminor := s.next(8)
 694                 if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
 695                         hdr.Devmajor = p.parseNumeric(devmajor)
 696                         hdr.Devminor = p.parseNumeric(devminor)
 697                 }
 698                 var prefix string
 699                 switch format {
 700                 case "posix", "gnu":
 701                         prefix = p.parseString(s.next(155))
 702                 case "star":
 703                         prefix = p.parseString(s.next(131))
 704                         hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0)
 705                         hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0)
 706                 }
 707                 if len(prefix) > 0 {
 708                         hdr.Name = prefix + "/" + hdr.Name
 709                 }
 710         }
 711
 712         if p.err != nil {
 713                 tr.err = p.err
 714                 return nil
 715         }
 716
 717         nb := hdr.Size
 718         if isHeaderOnlyType(hdr.Typeflag) {
 719                 nb = 0
 720         }
 721         if nb < 0 {
 722                 tr.err = ErrHeader
 723                 return nil
 724         }
 725
 726         // Set the current file reader.
 727         tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
 728         tr.curr = &regFileReader{r: tr.r, nb: nb}
 729
 730         // Check for old GNU sparse format entry.
 731         if hdr.Typeflag == TypeGNUSparse {
 732                 // Get the real size of the file.
 733                 hdr.Size = p.parseNumeric(header[483:495])
 734                 if p.err != nil {
 735                         tr.err = p.err
 736                         return nil
 737                 }
 738
 739                 // Read the sparse map.
 740                 sp := tr.readOldGNUSparseMap(header)
 741                 if tr.err != nil {
 742                         return nil
 743                 }
 744
 745                 // Current file is a GNU sparse file. Update the current file reader.
 746                 tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size)
 747                 if tr.err != nil {
 748                         return nil
 749                 }
 750         }
 751
 752         return hdr
 753 }
 754
 755 // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
 756 // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
 757 // then one or more extension headers are used to store the rest of the sparse map.
 758 func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
 759         var p parser
 760         isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0
 761         spCap := oldGNUSparseMainHeaderNumEntries
 762         if isExtended {
 763                 spCap += oldGNUSparseExtendedHeaderNumEntries
 764         }
 765         sp := make([]sparseEntry, 0, spCap)
 766         s := slicer(header[oldGNUSparseMainHeaderOffset:])
 767
 768         // Read the four entries from the main tar header
 769         for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ {
 770                 offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
 771                 numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
 772                 if p.err != nil {
 773                         tr.err = p.err
 774                         return nil
 775                 }
 776                 if offset == 0 && numBytes == 0 {
 777                         break
 778                 }
 779                 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
 780         }
 781
 782         for isExtended {
 783                 // There are more entries. Read an extension header and parse its entries.
 784                 sparseHeader := make([]byte, blockSize)
 785                 if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil {
 786                         return nil
 787                 }
 788                 if tr.RawAccounting {
 789                         if _, tr.err = tr.rawBytes.Write(sparseHeader); tr.err != nil {
 790                                 return nil
 791                         }
 792                 }
 793
 794                 isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0
 795                 s = slicer(sparseHeader)
 796                 for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ {
 797                         offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize))
 798                         numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize))
 799                         if p.err != nil {
 800                                 tr.err = p.err
 801                                 return nil
 802                         }
 803                         if offset == 0 && numBytes == 0 {
 804                                 break
 805                         }
 806                         sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
 807                 }
 808         }
 809         return sp
 810 }
 811
 812 // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
 813 // version 1.0. The format of the sparse map consists of a series of
 814 // newline-terminated numeric fields. The first field is the number of entries
 815 // and is always present. Following this are the entries, consisting of two
 816 // fields (offset, numBytes). This function must stop reading at the end
 817 // boundary of the block containing the last newline.
 818 //
 819 // Note that the GNU manual says that numeric values should be encoded in octal
 820 // format. However, the GNU tar utility itself outputs these values in decimal.
 821 // As such, this library treats values as being encoded in decimal.
 822 func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
 823         var cntNewline int64
 824         var buf bytes.Buffer
 825         var blk = make([]byte, blockSize)
 826
 827         // feedTokens copies data in numBlock chunks from r into buf until there are
 828         // at least cnt newlines in buf. It will not read more blocks than needed.
 829         var feedTokens = func(cnt int64) error {
 830                 for cntNewline < cnt {
 831                         if _, err := io.ReadFull(r, blk); err != nil {
 832                                 if err == io.EOF {
 833                                         err = io.ErrUnexpectedEOF
 834                                 }
 835                                 return err
 836                         }
 837                         buf.Write(blk)
 838                         for _, c := range blk {
 839                                 if c == '\n' {
 840                                         cntNewline++
 841                                 }
 842                         }
 843                 }
 844                 return nil
 845         }
 846
 847         // nextToken gets the next token delimited by a newline. This assumes that
 848         // at least one newline exists in the buffer.
 849         var nextToken = func() string {
 850                 cntNewline--
 851                 tok, _ := buf.ReadString('\n')
 852                 return tok[:len(tok)-1] // Cut off newline
 853         }
 854
 855         // Parse for the number of entries.
 856         // Use integer overflow resistant math to check this.
 857         if err := feedTokens(1); err != nil {
 858                 return nil, err
 859         }
 860         numEntries, err := strconv.ParseInt(nextToken(), 10, 0) // Intentionally parse as native int
 861         if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
 862                 return nil, ErrHeader
 863         }
 864
 865         // Parse for all member entries.
 866         // numEntries is trusted after this since a potential attacker must have
 867         // committed resources proportional to what this library used.
 868         if err := feedTokens(2 * numEntries); err != nil {
 869                 return nil, err
 870         }
 871         sp := make([]sparseEntry, 0, numEntries)
 872         for i := int64(0); i < numEntries; i++ {
 873                 offset, err := strconv.ParseInt(nextToken(), 10, 64)
 874                 if err != nil {
 875                         return nil, ErrHeader
 876                 }
 877                 numBytes, err := strconv.ParseInt(nextToken(), 10, 64)
 878                 if err != nil {
 879                         return nil, ErrHeader
 880                 }
 881                 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
 882         }
 883         return sp, nil
 884 }
 885
 886 // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format
 887 // version 0.1. The sparse map is stored in the PAX headers.
 888 func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) {
 889         // Get number of entries.
 890         // Use integer overflow resistant math to check this.
 891         numEntriesStr := extHdrs[paxGNUSparseNumBlocks]
 892         numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int
 893         if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
 894                 return nil, ErrHeader
 895         }
 896
 897         // There should be two numbers in sparseMap for each entry.
 898         sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",")
 899         if int64(len(sparseMap)) != 2*numEntries {
 900                 return nil, ErrHeader
 901         }
 902
 903         // Loop through the entries in the sparse map.
 904         // numEntries is trusted now.
 905         sp := make([]sparseEntry, 0, numEntries)
 906         for i := int64(0); i < numEntries; i++ {
 907                 offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64)
 908                 if err != nil {
 909                         return nil, ErrHeader
 910                 }
 911                 numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64)
 912                 if err != nil {
 913                         return nil, ErrHeader
 914                 }
 915                 sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
 916         }
 917         return sp, nil
 918 }
 919
 920 // numBytes returns the number of bytes left to read in the current file's entry
 921 // in the tar archive, or 0 if there is no current file.
 922 func (tr *Reader) numBytes() int64 {
 923         if tr.curr == nil {
 924                 // No current file, so no bytes
 925                 return 0
 926         }
 927         return tr.curr.numBytes()
 928 }
 929
 930 // Read reads from the current entry in the tar archive.
 931 // It returns 0, io.EOF when it reaches the end of that entry,
 932 // until Next is called to advance to the next entry.
 933 //
 934 // Calling Read on special types like TypeLink, TypeSymLink, TypeChar,
 935 // TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what
 936 // the Header.Size claims.
 937 func (tr *Reader) Read(b []byte) (n int, err error) {
 938         if tr.err != nil {
 939                 return 0, tr.err
 940         }
 941         if tr.curr == nil {
 942                 return 0, io.EOF
 943         }
 944
 945         n, err = tr.curr.Read(b)
 946         if err != nil && err != io.EOF {
 947                 tr.err = err
 948         }
 949         return
 950 }
 951
 952 func (rfr *regFileReader) Read(b []byte) (n int, err error) {
 953         if rfr.nb == 0 {
 954                 // file consumed
 955                 return 0, io.EOF
 956         }
 957         if int64(len(b)) > rfr.nb {
 958                 b = b[0:rfr.nb]
 959         }
 960         n, err = rfr.r.Read(b)
 961         rfr.nb -= int64(n)
 962
 963         if err == io.EOF && rfr.nb > 0 {
 964                 err = io.ErrUnexpectedEOF
 965         }
 966         return
 967 }
 968
 969 // numBytes returns the number of bytes left to read in the file's data in the tar archive.
 970 func (rfr *regFileReader) numBytes() int64 {
 971         return rfr.nb
 972 }
 973
 974 // newSparseFileReader creates a new sparseFileReader, but validates all of the
 975 // sparse entries before doing so.
 976 func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) {
 977         if total < 0 {
 978                 return nil, ErrHeader // Total size cannot be negative
 979         }
 980
 981         // Validate all sparse entries. These are the same checks as performed by
 982         // the BSD tar utility.
 983         for i, s := range sp {
 984                 switch {
 985                 case s.offset < 0 || s.numBytes < 0:
 986                         return nil, ErrHeader // Negative values are never okay
 987                 case s.offset > math.MaxInt64-s.numBytes:
 988                         return nil, ErrHeader // Integer overflow with large length
 989                 case s.offset+s.numBytes > total:
 990                         return nil, ErrHeader // Region extends beyond the "real" size
 991                 case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset:
 992                         return nil, ErrHeader // Regions can't overlap and must be in order
 993                 }
 994         }
 995         return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil
 996 }
 997
 998 // readHole reads a sparse hole ending at endOffset.
 999 func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int {
1000         n64 := endOffset - sfr.pos
1001         if n64 > int64(len(b)) {
1002                 n64 = int64(len(b))
1003         }
1004         n := int(n64)
1005         for i := 0; i < n; i++ {
1006                 b[i] = 0
1007         }
1008         sfr.pos += n64
1009         return n
1010 }
1011
1012 // Read reads the sparse file data in expanded form.
1013 func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
1014         // Skip past all empty fragments.
1015         for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 {
1016                 sfr.sp = sfr.sp[1:]
1017         }
1018
1019         // If there are no more fragments, then it is possible that there
1020         // is one last sparse hole.
1021         if len(sfr.sp) == 0 {
1022                 // This behavior matches the BSD tar utility.
1023                 // However, GNU tar stops returning data even if sfr.total is unmet.
1024                 if sfr.pos < sfr.total {
1025                         return sfr.readHole(b, sfr.total), nil
1026                 }
1027                 return 0, io.EOF
1028         }
1029
1030         // In front of a data fragment, so read a hole.
1031         if sfr.pos < sfr.sp[0].offset {
1032                 return sfr.readHole(b, sfr.sp[0].offset), nil
1033         }
1034
1035         // In a data fragment, so read from it.
1036         // This math is overflow free since we verify that offset and numBytes can
1037         // be safely added when creating the sparseFileReader.
1038         endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment
1039         bytesLeft := endPos - sfr.pos                   // Bytes left in fragment
1040         if int64(len(b)) > bytesLeft {
1041                 b = b[:bytesLeft]
1042         }
1043
1044         n, err = sfr.rfr.Read(b)
1045         sfr.pos += int64(n)
1046         if err == io.EOF {
1047                 if sfr.pos < endPos {
1048                         err = io.ErrUnexpectedEOF // There was supposed to be more data
1049                 } else if sfr.pos < sfr.total {
1050                         err = nil // There is still an implicit sparse hole at the end
1051                 }
1052         }
1053
1054         if sfr.pos == endPos {
1055                 sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it
1056         }
1057         return n, err
1058 }
1059
1060 // numBytes returns the number of bytes left to read in the sparse file's
1061 // sparse-encoded data in the tar archive.
1062 func (sfr *sparseFileReader) numBytes() int64 {
1063         return sfr.rfr.numBytes()
1064 }