libgo/go/fmt/scan.go

   1 // Copyright 2010 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package fmt
   6
   7 import (
   8         "bytes"
   9         "io"
  10         "os"
  11         "reflect"
  12         "strconv"
  13         "strings"
  14         "unicode"
  15         "utf8"
  16 )
  17
  18 // readRuner is the interface to something that can read runes.  If
  19 // the object provided to Scan does not satisfy this interface, the
  20 // object will be wrapped by a readRune object.
  21 type readRuner interface {
  22         ReadRune() (rune int, size int, err os.Error)
  23 }
  24
  25 // unreadRuner is the interface to something that can unread runes.
  26 // If the object provided to Scan does not satisfy this interface,
  27 // a local buffer will be used to back up the input, but its contents
  28 // will be lost when Scan returns.
  29 type unreadRuner interface {
  30         UnreadRune() os.Error
  31 }
  32
  33 // ScanState represents the scanner state passed to custom scanners.
  34 // Scanners may do rune-at-a-time scanning or ask the ScanState
  35 // to discover the next space-delimited token.
  36 type ScanState interface {
  37         // GetRune reads the next rune (Unicode code point) from the input.
  38         GetRune() (rune int, err os.Error)
  39         // UngetRune causes the next call to GetRune to return the rune.
  40         UngetRune()
  41         // Width returns the value of the width option and whether it has been set.
  42         // The unit is Unicode code points.
  43         Width() (wid int, ok bool)
  44         // Token returns the next space-delimited token from the input. If
  45         // a width has been specified, the returned token will be no longer
  46         // than the width.
  47         Token() (token string, err os.Error)
  48 }
  49
  50 // Scanner is implemented by any value that has a Scan method, which scans
  51 // the input for the representation of a value and stores the result in the
  52 // receiver, which must be a pointer to be useful.  The Scan method is called
  53 // for any argument to Scan or Scanln that implements it.
  54 type Scanner interface {
  55         Scan(state ScanState, verb int) os.Error
  56 }
  57
  58 // Scan scans text read from standard input, storing successive
  59 // space-separated values into successive arguments.  Newlines count
  60 // as space.  It returns the number of items successfully scanned.
  61 // If that is less than the number of arguments, err will report why.
  62 func Scan(a ...interface{}) (n int, err os.Error) {
  63         return Fscan(os.Stdin, a...)
  64 }
  65
  66 // Scanln is similar to Scan, but stops scanning at a newline and
  67 // after the final item there must be a newline or EOF.
  68 func Scanln(a ...interface{}) (n int, err os.Error) {
  69         return Fscanln(os.Stdin, a...)
  70 }
  71
  72 // Scanf scans text read from standard input, storing successive
  73 // space-separated values into successive arguments as determined by
  74 // the format.  It returns the number of items successfully scanned.
  75 func Scanf(format string, a ...interface{}) (n int, err os.Error) {
  76         return Fscanf(os.Stdin, format, a...)
  77 }
  78
  79 // Sscan scans the argument string, storing successive space-separated
  80 // values into successive arguments.  Newlines count as space.  It
  81 // returns the number of items successfully scanned.  If that is less
  82 // than the number of arguments, err will report why.
  83 func Sscan(str string, a ...interface{}) (n int, err os.Error) {
  84         return Fscan(strings.NewReader(str), a...)
  85 }
  86
  87 // Sscanln is similar to Sscan, but stops scanning at a newline and
  88 // after the final item there must be a newline or EOF.
  89 func Sscanln(str string, a ...interface{}) (n int, err os.Error) {
  90         return Fscanln(strings.NewReader(str), a...)
  91 }
  92
  93 // Sscanf scans the argument string, storing successive space-separated
  94 // values into successive arguments as determined by the format.  It
  95 // returns the number of items successfully parsed.
  96 func Sscanf(str string, format string, a ...interface{}) (n int, err os.Error) {
  97         return Fscanf(strings.NewReader(str), format, a...)
  98 }
  99
 100 // Fscan scans text read from r, storing successive space-separated
 101 // values into successive arguments.  Newlines count as space.  It
 102 // returns the number of items successfully scanned.  If that is less
 103 // than the number of arguments, err will report why.
 104 func Fscan(r io.Reader, a ...interface{}) (n int, err os.Error) {
 105         s := newScanState(r, true)
 106         n, err = s.doScan(a)
 107         s.free()
 108         return
 109 }
 110
 111 // Fscanln is similar to Fscan, but stops scanning at a newline and
 112 // after the final item there must be a newline or EOF.
 113 func Fscanln(r io.Reader, a ...interface{}) (n int, err os.Error) {
 114         s := newScanState(r, false)
 115         n, err = s.doScan(a)
 116         s.free()
 117         return
 118 }
 119
 120 // Fscanf scans text read from r, storing successive space-separated
 121 // values into successive arguments as determined by the format.  It
 122 // returns the number of items successfully parsed.
 123 func Fscanf(r io.Reader, format string, a ...interface{}) (n int, err os.Error) {
 124         s := newScanState(r, false)
 125         n, err = s.doScanf(format, a)
 126         s.free()
 127         return
 128 }
 129
 130 // scanError represents an error generated by the scanning software.
 131 // It's used as a unique signature to identify such errors when recovering.
 132 type scanError struct {
 133         err os.Error
 134 }
 135
 136 const EOF = -1
 137
 138 // ss is the internal implementation of ScanState.
 139 type ss struct {
 140         rr         readRuner    // where to read input
 141         buf        bytes.Buffer // token accumulator
 142         nlIsSpace  bool         // whether newline counts as white space
 143         peekRune   int          // one-rune lookahead
 144         prevRune   int          // last rune returned by GetRune
 145         atEOF      bool         // already read EOF
 146         maxWid     int          // max width of field, in runes
 147         widPresent bool         // width was specified
 148         wid        int          // width consumed so far; used in accept()
 149 }
 150
 151 func (s *ss) GetRune() (rune int, err os.Error) {
 152         if s.peekRune >= 0 {
 153                 rune = s.peekRune
 154                 s.prevRune = rune
 155                 s.peekRune = -1
 156                 return
 157         }
 158         rune, _, err = s.rr.ReadRune()
 159         if err == nil {
 160                 s.prevRune = rune
 161         }
 162         return
 163 }
 164
 165 func (s *ss) Width() (wid int, ok bool) {
 166         return s.maxWid, s.widPresent
 167 }
 168
 169 // The public method returns an error; this private one panics.
 170 // If getRune reaches EOF, the return value is EOF (-1).
 171 func (s *ss) getRune() (rune int) {
 172         if s.atEOF {
 173                 return EOF
 174         }
 175         if s.peekRune >= 0 {
 176                 rune = s.peekRune
 177                 s.prevRune = rune
 178                 s.peekRune = -1
 179                 return
 180         }
 181         rune, _, err := s.rr.ReadRune()
 182         if err == nil {
 183                 s.prevRune = rune
 184         } else if err != nil {
 185                 if err == os.EOF {
 186                         s.atEOF = true
 187                         return EOF
 188                 }
 189                 s.error(err)
 190         }
 191         return
 192 }
 193
 194 // mustGetRune turns os.EOF into a panic(io.ErrUnexpectedEOF).
 195 // It is called in cases such as string scanning where an EOF is a
 196 // syntax error.
 197 func (s *ss) mustGetRune() (rune int) {
 198         if s.atEOF {
 199                 s.error(io.ErrUnexpectedEOF)
 200         }
 201         if s.peekRune >= 0 {
 202                 rune = s.peekRune
 203                 s.peekRune = -1
 204                 return
 205         }
 206         rune, _, err := s.rr.ReadRune()
 207         if err != nil {
 208                 if err == os.EOF {
 209                         err = io.ErrUnexpectedEOF
 210                 }
 211                 s.error(err)
 212         }
 213         return
 214 }
 215
 216
 217 func (s *ss) UngetRune() {
 218         if u, ok := s.rr.(unreadRuner); ok {
 219                 u.UnreadRune()
 220         } else {
 221                 s.peekRune = s.prevRune
 222         }
 223 }
 224
 225 func (s *ss) error(err os.Error) {
 226         panic(scanError{err})
 227 }
 228
 229 func (s *ss) errorString(err string) {
 230         panic(scanError{os.ErrorString(err)})
 231 }
 232
 233 func (s *ss) Token() (tok string, err os.Error) {
 234         defer func() {
 235                 if e := recover(); e != nil {
 236                         if se, ok := e.(scanError); ok {
 237                                 err = se.err
 238                         } else {
 239                                 panic(e)
 240                         }
 241                 }
 242         }()
 243         tok = s.token()
 244         return
 245 }
 246
 247 // readRune is a structure to enable reading UTF-8 encoded code points
 248 // from an io.Reader.  It is used if the Reader given to the scanner does
 249 // not already implement ReadRuner.
 250 type readRune struct {
 251         reader  io.Reader
 252         buf     [utf8.UTFMax]byte // used only inside ReadRune
 253         pending int               // number of bytes in pendBuf; only >0 for bad UTF-8
 254         pendBuf [utf8.UTFMax]byte // bytes left over
 255 }
 256
 257 // readByte returns the next byte from the input, which may be
 258 // left over from a previous read if the UTF-8 was ill-formed.
 259 func (r *readRune) readByte() (b byte, err os.Error) {
 260         if r.pending > 0 {
 261                 b = r.pendBuf[0]
 262                 copy(r.pendBuf[0:], r.pendBuf[1:])
 263                 r.pending--
 264                 return
 265         }
 266         _, err = r.reader.Read(r.pendBuf[0:1])
 267         return r.pendBuf[0], err
 268 }
 269
 270 // unread saves the bytes for the next read.
 271 func (r *readRune) unread(buf []byte) {
 272         copy(r.pendBuf[r.pending:], buf)
 273         r.pending += len(buf)
 274 }
 275
 276 // ReadRune returns the next UTF-8 encoded code point from the
 277 // io.Reader inside r.
 278 func (r *readRune) ReadRune() (rune int, size int, err os.Error) {
 279         r.buf[0], err = r.readByte()
 280         if err != nil {
 281                 return 0, 0, err
 282         }
 283         if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
 284                 rune = int(r.buf[0])
 285                 return
 286         }
 287         var n int
 288         for n = 1; !utf8.FullRune(r.buf[0:n]); n++ {
 289                 r.buf[n], err = r.readByte()
 290                 if err != nil {
 291                         if err == os.EOF {
 292                                 err = nil
 293                                 break
 294                         }
 295                         return
 296                 }
 297         }
 298         rune, size = utf8.DecodeRune(r.buf[0:n])
 299         if size < n { // an error
 300                 r.unread(r.buf[size:n])
 301         }
 302         return
 303 }
 304
 305
 306 // A leaky bucket of reusable ss structures.
 307 var ssFree = make(chan *ss, 100)
 308
 309 // Allocate a new ss struct.  Probably can grab the previous one from ssFree.
 310 func newScanState(r io.Reader, nlIsSpace bool) *ss {
 311         s, ok := <-ssFree
 312         if !ok {
 313                 s = new(ss)
 314         }
 315         if rr, ok := r.(readRuner); ok {
 316                 s.rr = rr
 317         } else {
 318                 s.rr = &readRune{reader: r}
 319         }
 320         s.nlIsSpace = nlIsSpace
 321         s.peekRune = -1
 322         s.atEOF = false
 323         s.maxWid = 0
 324         s.widPresent = false
 325         return s
 326 }
 327
 328 // Save used ss structs in ssFree; avoid an allocation per invocation.
 329 func (s *ss) free() {
 330         // Don't hold on to ss structs with large buffers.
 331         if cap(s.buf.Bytes()) > 1024 {
 332                 return
 333         }
 334         s.buf.Reset()
 335         s.rr = nil
 336         _ = ssFree <- s
 337 }
 338
 339 // skipSpace skips spaces and maybe newlines.
 340 func (s *ss) skipSpace(stopAtNewline bool) {
 341         for {
 342                 rune := s.getRune()
 343                 if rune == EOF {
 344                         return
 345                 }
 346                 if rune == '\n' {
 347                         if stopAtNewline {
 348                                 break
 349                         }
 350                         if s.nlIsSpace {
 351                                 continue
 352                         }
 353                         s.errorString("unexpected newline")
 354                         return
 355                 }
 356                 if !unicode.IsSpace(rune) {
 357                         s.UngetRune()
 358                         break
 359                 }
 360         }
 361 }
 362
 363 // token returns the next space-delimited string from the input.  It
 364 // skips white space.  For Scanln, it stops at newlines.  For Scan,
 365 // newlines are treated as spaces.
 366 func (s *ss) token() string {
 367         s.skipSpace(false)
 368         // read until white space or newline
 369         for nrunes := 0; !s.widPresent || nrunes < s.maxWid; nrunes++ {
 370                 rune := s.getRune()
 371                 if rune == EOF {
 372                         break
 373                 }
 374                 if unicode.IsSpace(rune) {
 375                         s.UngetRune()
 376                         break
 377                 }
 378                 s.buf.WriteRune(rune)
 379         }
 380         return s.buf.String()
 381 }
 382
 383 // typeError indicates that the type of the operand did not match the format
 384 func (s *ss) typeError(field interface{}, expected string) {
 385         s.errorString("expected field of type pointer to " + expected + "; found " + reflect.Typeof(field).String())
 386 }
 387
 388 var complexError = os.ErrorString("syntax error scanning complex number")
 389 var boolError = os.ErrorString("syntax error scanning boolean")
 390
 391 // consume reads the next rune in the input and reports whether it is in the ok string.
 392 // If accept is true, it puts the character into the input token.
 393 func (s *ss) consume(ok string, accept bool) bool {
 394         if s.wid >= s.maxWid {
 395                 return false
 396         }
 397         rune := s.getRune()
 398         if rune == EOF {
 399                 return false
 400         }
 401         for i := 0; i < len(ok); i++ {
 402                 if int(ok[i]) == rune {
 403                         if accept {
 404                                 s.buf.WriteRune(rune)
 405                                 s.wid++
 406                         }
 407                         return true
 408                 }
 409         }
 410         if rune != EOF && accept {
 411                 s.UngetRune()
 412         }
 413         return false
 414 }
 415
 416 // accept checks the next rune in the input.  If it's a byte (sic) in the string, it puts it in the
 417 // buffer and returns true. Otherwise it return false.
 418 func (s *ss) accept(ok string) bool {
 419         return s.consume(ok, true)
 420 }
 421
 422 // okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
 423 func (s *ss) okVerb(verb int, okVerbs, typ string) bool {
 424         for _, v := range okVerbs {
 425                 if v == verb {
 426                         return true
 427                 }
 428         }
 429         s.errorString("bad verb %" + string(verb) + " for " + typ)
 430         return false
 431 }
 432
 433 // scanBool returns the value of the boolean represented by the next token.
 434 func (s *ss) scanBool(verb int) bool {
 435         if !s.okVerb(verb, "tv", "boolean") {
 436                 return false
 437         }
 438         // Syntax-checking a boolean is annoying.  We're not fastidious about case.
 439         switch s.mustGetRune() {
 440         case '0':
 441                 return false
 442         case '1':
 443                 return true
 444         case 't', 'T':
 445                 if s.accept("rR") && (!s.accept("uU") || !s.accept("eE")) {
 446                         s.error(boolError)
 447                 }
 448                 return true
 449         case 'f', 'F':
 450                 if s.accept("aL") && (!s.accept("lL") || !s.accept("sS") || !s.accept("eE")) {
 451                         s.error(boolError)
 452                 }
 453                 return false
 454         }
 455         return false
 456 }
 457
 458 // Numerical elements
 459 const (
 460         binaryDigits      = "01"
 461         octalDigits       = "01234567"
 462         decimalDigits     = "0123456789"
 463         hexadecimalDigits = "0123456789aAbBcCdDeEfF"
 464         sign              = "+-"
 465         period            = "."
 466         exponent          = "eE"
 467 )
 468
 469 // getBase returns the numeric base represented by the verb and its digit string.
 470 func (s *ss) getBase(verb int) (base int, digits string) {
 471         s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
 472         base = 10
 473         digits = decimalDigits
 474         switch verb {
 475         case 'b':
 476                 base = 2
 477                 digits = binaryDigits
 478         case 'o':
 479                 base = 8
 480                 digits = octalDigits
 481         case 'x', 'X', 'U':
 482                 base = 16
 483                 digits = hexadecimalDigits
 484         }
 485         return
 486 }
 487
 488 // scanNumber returns the numerical string with specified digits starting here.
 489 func (s *ss) scanNumber(digits string) string {
 490         if !s.accept(digits) {
 491                 s.errorString("expected integer")
 492         }
 493         for s.accept(digits) {
 494         }
 495         return s.buf.String()
 496 }
 497
 498 // scanRune returns the next rune value in the input.
 499 func (s *ss) scanRune(bitSize int) int64 {
 500         rune := int64(s.mustGetRune())
 501         n := uint(bitSize)
 502         x := (rune << (64 - n)) >> (64 - n)
 503         if x != rune {
 504                 s.errorString("overflow on character value " + string(rune))
 505         }
 506         return rune
 507 }
 508
 509 // scanInt returns the value of the integer represented by the next
 510 // token, checking for overflow.  Any error is stored in s.err.
 511 func (s *ss) scanInt(verb int, bitSize int) int64 {
 512         if verb == 'c' {
 513                 return s.scanRune(bitSize)
 514         }
 515         base, digits := s.getBase(verb)
 516         s.skipSpace(false)
 517         if verb == 'U' {
 518                 if !s.consume("U", false) || !s.consume("+", false) {
 519                         s.errorString("bad unicode format ")
 520                 }
 521         } else {
 522                 s.accept(sign) // If there's a sign, it will be left in the token buffer.
 523         }
 524         tok := s.scanNumber(digits)
 525         i, err := strconv.Btoi64(tok, base)
 526         if err != nil {
 527                 s.error(err)
 528         }
 529         n := uint(bitSize)
 530         x := (i << (64 - n)) >> (64 - n)
 531         if x != i {
 532                 s.errorString("integer overflow on token " + tok)
 533         }
 534         return i
 535 }
 536
 537 // scanUint returns the value of the unsigned integer represented
 538 // by the next token, checking for overflow.  Any error is stored in s.err.
 539 func (s *ss) scanUint(verb int, bitSize int) uint64 {
 540         if verb == 'c' {
 541                 return uint64(s.scanRune(bitSize))
 542         }
 543         base, digits := s.getBase(verb)
 544         s.skipSpace(false)
 545         if verb == 'U' {
 546                 if !s.consume("U", false) || !s.consume("+", false) {
 547                         s.errorString("bad unicode format ")
 548                 }
 549         }
 550         tok := s.scanNumber(digits)
 551         i, err := strconv.Btoui64(tok, base)
 552         if err != nil {
 553                 s.error(err)
 554         }
 555         n := uint(bitSize)
 556         x := (i << (64 - n)) >> (64 - n)
 557         if x != i {
 558                 s.errorString("unsigned integer overflow on token " + tok)
 559         }
 560         return i
 561 }
 562
 563 // floatToken returns the floating-point number starting here, no longer than swid
 564 // if the width is specified. It's not rigorous about syntax because it doesn't check that
 565 // we have at least some digits, but Atof will do that.
 566 func (s *ss) floatToken() string {
 567         s.buf.Reset()
 568         // NaN?
 569         if s.accept("nN") && s.accept("aA") && s.accept("nN") {
 570                 return s.buf.String()
 571         }
 572         // leading sign?
 573         s.accept(sign)
 574         // Inf?
 575         if s.accept("iI") && s.accept("nN") && s.accept("fF") {
 576                 return s.buf.String()
 577         }
 578         // digits?
 579         for s.accept(decimalDigits) {
 580         }
 581         // decimal point?
 582         if s.accept(period) {
 583                 // fraction?
 584                 for s.accept(decimalDigits) {
 585                 }
 586         }
 587         // exponent?
 588         if s.accept(exponent) {
 589                 // leading sign?
 590                 s.accept(sign)
 591                 // digits?
 592                 for s.accept(decimalDigits) {
 593                 }
 594         }
 595         return s.buf.String()
 596 }
 597
 598 // complexTokens returns the real and imaginary parts of the complex number starting here.
 599 // The number might be parenthesized and has the format (N+Ni) where N is a floating-point
 600 // number and there are no spaces within.
 601 func (s *ss) complexTokens() (real, imag string) {
 602         // TODO: accept N and Ni independently?
 603         parens := s.accept("(")
 604         real = s.floatToken()
 605         s.buf.Reset()
 606         // Must now have a sign.
 607         if !s.accept("+-") {
 608                 s.error(complexError)
 609         }
 610         // Sign is now in buffer
 611         imagSign := s.buf.String()
 612         imag = s.floatToken()
 613         if !s.accept("i") {
 614                 s.error(complexError)
 615         }
 616         if parens && !s.accept(")") {
 617                 s.error(complexError)
 618         }
 619         return real, imagSign + imag
 620 }
 621
 622 // convertFloat converts the string to a float64value.
 623 func (s *ss) convertFloat(str string, n int) float64 {
 624         f, err := strconv.AtofN(str, n)
 625         if err != nil {
 626                 s.error(err)
 627         }
 628         return f
 629 }
 630
 631 // convertComplex converts the next token to a complex128 value.
 632 // The atof argument is a type-specific reader for the underlying type.
 633 // If we're reading complex64, atof will parse float32s and convert them
 634 // to float64's to avoid reproducing this code for each complex type.
 635 func (s *ss) scanComplex(verb int, n int) complex128 {
 636         if !s.okVerb(verb, floatVerbs, "complex") {
 637                 return 0
 638         }
 639         s.skipSpace(false)
 640         sreal, simag := s.complexTokens()
 641         real := s.convertFloat(sreal, n/2)
 642         imag := s.convertFloat(simag, n/2)
 643         return complex(real, imag)
 644 }
 645
 646 // convertString returns the string represented by the next input characters.
 647 // The format of the input is determined by the verb.
 648 func (s *ss) convertString(verb int) (str string) {
 649         if !s.okVerb(verb, "svqx", "string") {
 650                 return ""
 651         }
 652         s.skipSpace(false)
 653         switch verb {
 654         case 'q':
 655                 str = s.quotedString()
 656         case 'x':
 657                 str = s.hexString()
 658         default:
 659                 str = s.token() // %s and %v just return the next word
 660         }
 661         // Empty strings other than with %q are not OK.
 662         if len(str) == 0 && verb != 'q' && s.maxWid > 0 {
 663                 s.errorString("Scan: no data for string")
 664         }
 665         return
 666 }
 667
 668 // quotedString returns the double- or back-quoted string represented by the next input characters.
 669 func (s *ss) quotedString() string {
 670         quote := s.mustGetRune()
 671         switch quote {
 672         case '`':
 673                 // Back-quoted: Anything goes until EOF or back quote.
 674                 for {
 675                         rune := s.mustGetRune()
 676                         if rune == quote {
 677                                 break
 678                         }
 679                         s.buf.WriteRune(rune)
 680                 }
 681                 return s.buf.String()
 682         case '"':
 683                 // Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes.
 684                 s.buf.WriteRune(quote)
 685                 for {
 686                         rune := s.mustGetRune()
 687                         s.buf.WriteRune(rune)
 688                         if rune == '\\' {
 689                                 // In a legal backslash escape, no matter how long, only the character
 690                                 // immediately after the escape can itself be a backslash or quote.
 691                                 // Thus we only need to protect the first character after the backslash.
 692                                 rune := s.mustGetRune()
 693                                 s.buf.WriteRune(rune)
 694                         } else if rune == '"' {
 695                                 break
 696                         }
 697                 }
 698                 result, err := strconv.Unquote(s.buf.String())
 699                 if err != nil {
 700                         s.error(err)
 701                 }
 702                 return result
 703         default:
 704                 s.errorString("expected quoted string")
 705         }
 706         return ""
 707 }
 708
 709 // hexDigit returns the value of the hexadecimal digit
 710 func (s *ss) hexDigit(digit int) int {
 711         switch digit {
 712         case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 713                 return digit - '0'
 714         case 'a', 'b', 'c', 'd', 'e', 'f':
 715                 return 10 + digit - 'a'
 716         case 'A', 'B', 'C', 'D', 'E', 'F':
 717                 return 10 + digit - 'A'
 718         }
 719         s.errorString("Scan: illegal hex digit")
 720         return 0
 721 }
 722
 723 // hexByte returns the next hex-encoded (two-character) byte from the input.
 724 // There must be either two hexadecimal digits or a space character in the input.
 725 func (s *ss) hexByte() (b byte, ok bool) {
 726         rune1 := s.getRune()
 727         if rune1 == EOF {
 728                 return
 729         }
 730         if unicode.IsSpace(rune1) {
 731                 s.UngetRune()
 732                 return
 733         }
 734         rune2 := s.mustGetRune()
 735         return byte(s.hexDigit(rune1)<<4 | s.hexDigit(rune2)), true
 736 }
 737
 738 // hexString returns the space-delimited hexpair-encoded string.
 739 func (s *ss) hexString() string {
 740         for {
 741                 b, ok := s.hexByte()
 742                 if !ok {
 743                         break
 744                 }
 745                 s.buf.WriteByte(b)
 746         }
 747         if s.buf.Len() == 0 {
 748                 s.errorString("Scan: no hex data for %x string")
 749                 return ""
 750         }
 751         return s.buf.String()
 752 }
 753
 754 const floatVerbs = "eEfFgGv"
 755
 756 // scanOne scans a single value, deriving the scanner from the type of the argument.
 757 func (s *ss) scanOne(verb int, field interface{}) {
 758         s.buf.Reset()
 759         var err os.Error
 760         // If the parameter has its own Scan method, use that.
 761         if v, ok := field.(Scanner); ok {
 762                 err = v.Scan(s, verb)
 763                 if err != nil {
 764                         s.error(err)
 765                 }
 766                 return
 767         }
 768         if !s.widPresent {
 769                 s.maxWid = 1 << 30 // Huge
 770         }
 771         s.wid = 0
 772         switch v := field.(type) {
 773         case *bool:
 774                 *v = s.scanBool(verb)
 775         case *complex64:
 776                 *v = complex64(s.scanComplex(verb, 64))
 777         case *complex128:
 778                 *v = s.scanComplex(verb, 128)
 779         case *int:
 780                 *v = int(s.scanInt(verb, intBits))
 781         case *int8:
 782                 *v = int8(s.scanInt(verb, 8))
 783         case *int16:
 784                 *v = int16(s.scanInt(verb, 16))
 785         case *int32:
 786                 *v = int32(s.scanInt(verb, 32))
 787         case *int64:
 788                 *v = s.scanInt(verb, 64)
 789         case *uint:
 790                 *v = uint(s.scanUint(verb, intBits))
 791         case *uint8:
 792                 *v = uint8(s.scanUint(verb, 8))
 793         case *uint16:
 794                 *v = uint16(s.scanUint(verb, 16))
 795         case *uint32:
 796                 *v = uint32(s.scanUint(verb, 32))
 797         case *uint64:
 798                 *v = s.scanUint(verb, 64)
 799         case *uintptr:
 800                 *v = uintptr(s.scanUint(verb, uintptrBits))
 801         // Floats are tricky because you want to scan in the precision of the result, not
 802         // scan in high precision and convert, in order to preserve the correct error condition.
 803         case *float32:
 804                 if s.okVerb(verb, floatVerbs, "float32") {
 805                         s.skipSpace(false)
 806                         *v = float32(s.convertFloat(s.floatToken(), 32))
 807                 }
 808         case *float64:
 809                 if s.okVerb(verb, floatVerbs, "float64") {
 810                         s.skipSpace(false)
 811                         *v = s.convertFloat(s.floatToken(), 64)
 812                 }
 813         case *string:
 814                 *v = s.convertString(verb)
 815         case *[]byte:
 816                 // We scan to string and convert so we get a copy of the data.
 817                 // If we scanned to bytes, the slice would point at the buffer.
 818                 *v = []byte(s.convertString(verb))
 819         default:
 820                 val := reflect.NewValue(v)
 821                 ptr, ok := val.(*reflect.PtrValue)
 822                 if !ok {
 823                         s.errorString("Scan: type not a pointer: " + val.Type().String())
 824                         return
 825                 }
 826                 switch v := ptr.Elem().(type) {
 827                 case *reflect.BoolValue:
 828                         v.Set(s.scanBool(verb))
 829                 case *reflect.IntValue:
 830                         v.Set(s.scanInt(verb, v.Type().Bits()))
 831                 case *reflect.UintValue:
 832                         v.Set(s.scanUint(verb, v.Type().Bits()))
 833                 case *reflect.StringValue:
 834                         v.Set(s.convertString(verb))
 835                 case *reflect.SliceValue:
 836                         // For now, can only handle (renamed) []byte.
 837                         typ := v.Type().(*reflect.SliceType)
 838                         if typ.Elem().Kind() != reflect.Uint8 {
 839                                 goto CantHandle
 840                         }
 841                         str := s.convertString(verb)
 842                         v.Set(reflect.MakeSlice(typ, len(str), len(str)))
 843                         for i := 0; i < len(str); i++ {
 844                                 v.Elem(i).(*reflect.UintValue).Set(uint64(str[i]))
 845                         }
 846                 case *reflect.FloatValue:
 847                         s.skipSpace(false)
 848                         v.Set(s.convertFloat(s.floatToken(), v.Type().Bits()))
 849                 case *reflect.ComplexValue:
 850                         v.Set(s.scanComplex(verb, v.Type().Bits()))
 851                 default:
 852                 CantHandle:
 853                         s.errorString("Scan: can't handle type: " + val.Type().String())
 854                 }
 855         }
 856 }
 857
 858 // errorHandler turns local panics into error returns.  EOFs are benign.
 859 func errorHandler(errp *os.Error) {
 860         if e := recover(); e != nil {
 861                 if se, ok := e.(scanError); ok { // catch local error
 862                         if se.err != os.EOF {
 863                                 *errp = se.err
 864                         }
 865                 } else {
 866                         panic(e)
 867                 }
 868         }
 869 }
 870
 871 // doScan does the real work for scanning without a format string.
 872 // At the moment, it handles only pointers to basic types.
 873 func (s *ss) doScan(a []interface{}) (numProcessed int, err os.Error) {
 874         defer errorHandler(&err)
 875         for _, field := range a {
 876                 s.scanOne('v', field)
 877                 numProcessed++
 878         }
 879         // Check for newline if required.
 880         if !s.nlIsSpace {
 881                 for {
 882                         rune := s.getRune()
 883                         if rune == '\n' || rune == EOF {
 884                                 break
 885                         }
 886                         if !unicode.IsSpace(rune) {
 887                                 s.errorString("Scan: expected newline")
 888                                 break
 889                         }
 890                 }
 891         }
 892         return
 893 }
 894
 895 // advance determines whether the next characters in the input match
 896 // those of the format.  It returns the number of bytes (sic) consumed
 897 // in the format. Newlines included, all runs of space characters in
 898 // either input or format behave as a single space. This routine also
 899 // handles the %% case.  If the return value is zero, either format
 900 // starts with a % (with no following %) or the input is empty.
 901 // If it is negative, the input did not match the string.
 902 func (s *ss) advance(format string) (i int) {
 903         for i < len(format) {
 904                 fmtc, w := utf8.DecodeRuneInString(format[i:])
 905                 if fmtc == '%' {
 906                         // %% acts like a real percent
 907                         nextc, _ := utf8.DecodeRuneInString(format[i+w:]) // will not match % if string is empty
 908                         if nextc != '%' {
 909                                 return
 910                         }
 911                         i += w // skip the first %
 912                 }
 913                 sawSpace := false
 914                 for unicode.IsSpace(fmtc) && i < len(format) {
 915                         sawSpace = true
 916                         i += w
 917                         fmtc, w = utf8.DecodeRuneInString(format[i:])
 918                 }
 919                 if sawSpace {
 920                         // There was space in the format, so there should be space (EOF)
 921                         // in the input.
 922                         inputc := s.getRune()
 923                         if inputc == EOF {
 924                                 return
 925                         }
 926                         if !unicode.IsSpace(inputc) {
 927                                 // Space in format but not in input: error
 928                                 s.errorString("expected space in input to match format")
 929                         }
 930                         s.skipSpace(true)
 931                         continue
 932                 }
 933                 inputc := s.mustGetRune()
 934                 if fmtc != inputc {
 935                         s.UngetRune()
 936                         return -1
 937                 }
 938                 i += w
 939         }
 940         return
 941 }
 942
 943 // doScanf does the real work when scanning with a format string.
 944 //  At the moment, it handles only pointers to basic types.
 945 func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err os.Error) {
 946         defer errorHandler(&err)
 947         end := len(format) - 1
 948         // We process one item per non-trivial format
 949         for i := 0; i <= end; {
 950                 w := s.advance(format[i:])
 951                 if w > 0 {
 952                         i += w
 953                         continue
 954                 }
 955                 // Either we failed to advance, we have a percent character, or we ran out of input.
 956                 if format[i] != '%' {
 957                         // Can't advance format.  Why not?
 958                         if w < 0 {
 959                                 s.errorString("input does not match format")
 960                         }
 961                         // Otherwise at EOF; "too many operands" error handled below
 962                         break
 963                 }
 964                 i++ // % is one byte
 965
 966                 // do we have 20 (width)?
 967                 s.maxWid, s.widPresent, i = parsenum(format, i, end)
 968
 969                 c, w := utf8.DecodeRuneInString(format[i:])
 970                 i += w
 971
 972                 if numProcessed >= len(a) { // out of operands
 973                         s.errorString("too few operands for format %" + format[i-w:])
 974                         break
 975                 }
 976                 field := a[numProcessed]
 977
 978                 s.scanOne(c, field)
 979                 numProcessed++
 980         }
 981         if numProcessed < len(a) {
 982                 s.errorString("too many operands")
 983         }
 984         return
 985 }