vendor/github.com/BurntSushi/toml/parse.go

   1 package toml
   2
   3 import (
   4         "fmt"
   5         "log"
   6         "strconv"
   7         "strings"
   8         "time"
   9         "unicode"
  10         "unicode/utf8"
  11 )
  12
  13 type parser struct {
  14         mapping map[string]interface{}
  15         types   map[string]tomlType
  16         lx      *lexer
  17
  18         // A list of keys in the order that they appear in the TOML data.
  19         ordered []Key
  20
  21         // the full key for the current hash in scope
  22         context Key
  23
  24         // the base key name for everything except hashes
  25         currentKey string
  26
  27         // rough approximation of line number
  28         approxLine int
  29
  30         // A map of 'key.group.names' to whether they were created implicitly.
  31         implicits map[string]bool
  32 }
  33
  34 type parseError string
  35
  36 func (pe parseError) Error() string {
  37         return string(pe)
  38 }
  39
  40 func parse(data string) (p *parser, err error) {
  41         defer func() {
  42                 if r := recover(); r != nil {
  43                         var ok bool
  44                         if err, ok = r.(parseError); ok {
  45                                 return
  46                         }
  47                         panic(r)
  48                 }
  49         }()
  50
  51         p = &parser{
  52                 mapping:   make(map[string]interface{}),
  53                 types:     make(map[string]tomlType),
  54                 lx:        lex(data),
  55                 ordered:   make([]Key, 0),
  56                 implicits: make(map[string]bool),
  57         }
  58         for {
  59                 item := p.next()
  60                 if item.typ == itemEOF {
  61                         break
  62                 }
  63                 p.topLevel(item)
  64         }
  65
  66         return p, nil
  67 }
  68
  69 func (p *parser) panicf(format string, v ...interface{}) {
  70         msg := fmt.Sprintf("Near line %d (last key parsed '%s'): %s",
  71                 p.approxLine, p.current(), fmt.Sprintf(format, v...))
  72         panic(parseError(msg))
  73 }
  74
  75 func (p *parser) next() item {
  76         it := p.lx.nextItem()
  77         if it.typ == itemError {
  78                 p.panicf("%s", it.val)
  79         }
  80         return it
  81 }
  82
  83 func (p *parser) bug(format string, v ...interface{}) {
  84         log.Fatalf("BUG: %s\n\n", fmt.Sprintf(format, v...))
  85 }
  86
  87 func (p *parser) expect(typ itemType) item {
  88         it := p.next()
  89         p.assertEqual(typ, it.typ)
  90         return it
  91 }
  92
  93 func (p *parser) assertEqual(expected, got itemType) {
  94         if expected != got {
  95                 p.bug("Expected '%s' but got '%s'.", expected, got)
  96         }
  97 }
  98
  99 func (p *parser) topLevel(item item) {
 100         switch item.typ {
 101         case itemCommentStart:
 102                 p.approxLine = item.line
 103                 p.expect(itemText)
 104         case itemTableStart:
 105                 kg := p.next()
 106                 p.approxLine = kg.line
 107
 108                 var key Key
 109                 for ; kg.typ != itemTableEnd && kg.typ != itemEOF; kg = p.next() {
 110                         key = append(key, p.keyString(kg))
 111                 }
 112                 p.assertEqual(itemTableEnd, kg.typ)
 113
 114                 p.establishContext(key, false)
 115                 p.setType("", tomlHash)
 116                 p.ordered = append(p.ordered, key)
 117         case itemArrayTableStart:
 118                 kg := p.next()
 119                 p.approxLine = kg.line
 120
 121                 var key Key
 122                 for ; kg.typ != itemArrayTableEnd && kg.typ != itemEOF; kg = p.next() {
 123                         key = append(key, p.keyString(kg))
 124                 }
 125                 p.assertEqual(itemArrayTableEnd, kg.typ)
 126
 127                 p.establishContext(key, true)
 128                 p.setType("", tomlArrayHash)
 129                 p.ordered = append(p.ordered, key)
 130         case itemKeyStart:
 131                 kname := p.next()
 132                 p.approxLine = kname.line
 133                 p.currentKey = p.keyString(kname)
 134
 135                 val, typ := p.value(p.next())
 136                 p.setValue(p.currentKey, val)
 137                 p.setType(p.currentKey, typ)
 138                 p.ordered = append(p.ordered, p.context.add(p.currentKey))
 139                 p.currentKey = ""
 140         default:
 141                 p.bug("Unexpected type at top level: %s", item.typ)
 142         }
 143 }
 144
 145 // Gets a string for a key (or part of a key in a table name).
 146 func (p *parser) keyString(it item) string {
 147         switch it.typ {
 148         case itemText:
 149                 return it.val
 150         case itemString, itemMultilineString,
 151                 itemRawString, itemRawMultilineString:
 152                 s, _ := p.value(it)
 153                 return s.(string)
 154         default:
 155                 p.bug("Unexpected key type: %s", it.typ)
 156                 panic("unreachable")
 157         }
 158 }
 159
 160 // value translates an expected value from the lexer into a Go value wrapped
 161 // as an empty interface.
 162 func (p *parser) value(it item) (interface{}, tomlType) {
 163         switch it.typ {
 164         case itemString:
 165                 return p.replaceEscapes(it.val), p.typeOfPrimitive(it)
 166         case itemMultilineString:
 167                 trimmed := stripFirstNewline(stripEscapedWhitespace(it.val))
 168                 return p.replaceEscapes(trimmed), p.typeOfPrimitive(it)
 169         case itemRawString:
 170                 return it.val, p.typeOfPrimitive(it)
 171         case itemRawMultilineString:
 172                 return stripFirstNewline(it.val), p.typeOfPrimitive(it)
 173         case itemBool:
 174                 switch it.val {
 175                 case "true":
 176                         return true, p.typeOfPrimitive(it)
 177                 case "false":
 178                         return false, p.typeOfPrimitive(it)
 179                 }
 180                 p.bug("Expected boolean value, but got '%s'.", it.val)
 181         case itemInteger:
 182                 num, err := strconv.ParseInt(it.val, 10, 64)
 183                 if err != nil {
 184                         // See comment below for floats describing why we make a
 185                         // distinction between a bug and a user error.
 186                         if e, ok := err.(*strconv.NumError); ok &&
 187                                 e.Err == strconv.ErrRange {
 188
 189                                 p.panicf("Integer '%s' is out of the range of 64-bit "+
 190                                         "signed integers.", it.val)
 191                         } else {
 192                                 p.bug("Expected integer value, but got '%s'.", it.val)
 193                         }
 194                 }
 195                 return num, p.typeOfPrimitive(it)
 196         case itemFloat:
 197                 num, err := strconv.ParseFloat(it.val, 64)
 198                 if err != nil {
 199                         // Distinguish float values. Normally, it'd be a bug if the lexer
 200                         // provides an invalid float, but it's possible that the float is
 201                         // out of range of valid values (which the lexer cannot determine).
 202                         // So mark the former as a bug but the latter as a legitimate user
 203                         // error.
 204                         //
 205                         // This is also true for integers.
 206                         if e, ok := err.(*strconv.NumError); ok &&
 207                                 e.Err == strconv.ErrRange {
 208
 209                                 p.panicf("Float '%s' is out of the range of 64-bit "+
 210                                         "IEEE-754 floating-point numbers.", it.val)
 211                         } else {
 212                                 p.bug("Expected float value, but got '%s'.", it.val)
 213                         }
 214                 }
 215                 return num, p.typeOfPrimitive(it)
 216         case itemDatetime:
 217                 t, err := time.Parse("2006-01-02T15:04:05Z", it.val)
 218                 if err != nil {
 219                         p.bug("Expected Zulu formatted DateTime, but got '%s'.", it.val)
 220                 }
 221                 return t, p.typeOfPrimitive(it)
 222         case itemArray:
 223                 array := make([]interface{}, 0)
 224                 types := make([]tomlType, 0)
 225
 226                 for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
 227                         if it.typ == itemCommentStart {
 228                                 p.expect(itemText)
 229                                 continue
 230                         }
 231
 232                         val, typ := p.value(it)
 233                         array = append(array, val)
 234                         types = append(types, typ)
 235                 }
 236                 return array, p.typeOfArray(types)
 237         }
 238         p.bug("Unexpected value type: %s", it.typ)
 239         panic("unreachable")
 240 }
 241
 242 // establishContext sets the current context of the parser,
 243 // where the context is either a hash or an array of hashes. Which one is
 244 // set depends on the value of the `array` parameter.
 245 //
 246 // Establishing the context also makes sure that the key isn't a duplicate, and
 247 // will create implicit hashes automatically.
 248 func (p *parser) establishContext(key Key, array bool) {
 249         var ok bool
 250
 251         // Always start at the top level and drill down for our context.
 252         hashContext := p.mapping
 253         keyContext := make(Key, 0)
 254
 255         // We only need implicit hashes for key[0:-1]
 256         for _, k := range key[0 : len(key)-1] {
 257                 _, ok = hashContext[k]
 258                 keyContext = append(keyContext, k)
 259
 260                 // No key? Make an implicit hash and move on.
 261                 if !ok {
 262                         p.addImplicit(keyContext)
 263                         hashContext[k] = make(map[string]interface{})
 264                 }
 265
 266                 // If the hash context is actually an array of tables, then set
 267                 // the hash context to the last element in that array.
 268                 //
 269                 // Otherwise, it better be a table, since this MUST be a key group (by
 270                 // virtue of it not being the last element in a key).
 271                 switch t := hashContext[k].(type) {
 272                 case []map[string]interface{}:
 273                         hashContext = t[len(t)-1]
 274                 case map[string]interface{}:
 275                         hashContext = t
 276                 default:
 277                         p.panicf("Key '%s' was already created as a hash.", keyContext)
 278                 }
 279         }
 280
 281         p.context = keyContext
 282         if array {
 283                 // If this is the first element for this array, then allocate a new
 284                 // list of tables for it.
 285                 k := key[len(key)-1]
 286                 if _, ok := hashContext[k]; !ok {
 287                         hashContext[k] = make([]map[string]interface{}, 0, 5)
 288                 }
 289
 290                 // Add a new table. But make sure the key hasn't already been used
 291                 // for something else.
 292                 if hash, ok := hashContext[k].([]map[string]interface{}); ok {
 293                         hashContext[k] = append(hash, make(map[string]interface{}))
 294                 } else {
 295                         p.panicf("Key '%s' was already created and cannot be used as "+
 296                                 "an array.", keyContext)
 297                 }
 298         } else {
 299                 p.setValue(key[len(key)-1], make(map[string]interface{}))
 300         }
 301         p.context = append(p.context, key[len(key)-1])
 302 }
 303
 304 // setValue sets the given key to the given value in the current context.
 305 // It will make sure that the key hasn't already been defined, account for
 306 // implicit key groups.
 307 func (p *parser) setValue(key string, value interface{}) {
 308         var tmpHash interface{}
 309         var ok bool
 310
 311         hash := p.mapping
 312         keyContext := make(Key, 0)
 313         for _, k := range p.context {
 314                 keyContext = append(keyContext, k)
 315                 if tmpHash, ok = hash[k]; !ok {
 316                         p.bug("Context for key '%s' has not been established.", keyContext)
 317                 }
 318                 switch t := tmpHash.(type) {
 319                 case []map[string]interface{}:
 320                         // The context is a table of hashes. Pick the most recent table
 321                         // defined as the current hash.
 322                         hash = t[len(t)-1]
 323                 case map[string]interface{}:
 324                         hash = t
 325                 default:
 326                         p.bug("Expected hash to have type 'map[string]interface{}', but "+
 327                                 "it has '%T' instead.", tmpHash)
 328                 }
 329         }
 330         keyContext = append(keyContext, key)
 331
 332         if _, ok := hash[key]; ok {
 333                 // Typically, if the given key has already been set, then we have
 334                 // to raise an error since duplicate keys are disallowed. However,
 335                 // it's possible that a key was previously defined implicitly. In this
 336                 // case, it is allowed to be redefined concretely. (See the
 337                 // `tests/valid/implicit-and-explicit-after.toml` test in `toml-test`.)
 338                 //
 339                 // But we have to make sure to stop marking it as an implicit. (So that
 340                 // another redefinition provokes an error.)
 341                 //
 342                 // Note that since it has already been defined (as a hash), we don't
 343                 // want to overwrite it. So our business is done.
 344                 if p.isImplicit(keyContext) {
 345                         p.removeImplicit(keyContext)
 346                         return
 347                 }
 348
 349                 // Otherwise, we have a concrete key trying to override a previous
 350                 // key, which is *always* wrong.
 351                 p.panicf("Key '%s' has already been defined.", keyContext)
 352         }
 353         hash[key] = value
 354 }
 355
 356 // setType sets the type of a particular value at a given key.
 357 // It should be called immediately AFTER setValue.
 358 //
 359 // Note that if `key` is empty, then the type given will be applied to the
 360 // current context (which is either a table or an array of tables).
 361 func (p *parser) setType(key string, typ tomlType) {
 362         keyContext := make(Key, 0, len(p.context)+1)
 363         for _, k := range p.context {
 364                 keyContext = append(keyContext, k)
 365         }
 366         if len(key) > 0 { // allow type setting for hashes
 367                 keyContext = append(keyContext, key)
 368         }
 369         p.types[keyContext.String()] = typ
 370 }
 371
 372 // addImplicit sets the given Key as having been created implicitly.
 373 func (p *parser) addImplicit(key Key) {
 374         p.implicits[key.String()] = true
 375 }
 376
 377 // removeImplicit stops tagging the given key as having been implicitly
 378 // created.
 379 func (p *parser) removeImplicit(key Key) {
 380         p.implicits[key.String()] = false
 381 }
 382
 383 // isImplicit returns true if the key group pointed to by the key was created
 384 // implicitly.
 385 func (p *parser) isImplicit(key Key) bool {
 386         return p.implicits[key.String()]
 387 }
 388
 389 // current returns the full key name of the current context.
 390 func (p *parser) current() string {
 391         if len(p.currentKey) == 0 {
 392                 return p.context.String()
 393         }
 394         if len(p.context) == 0 {
 395                 return p.currentKey
 396         }
 397         return fmt.Sprintf("%s.%s", p.context, p.currentKey)
 398 }
 399
 400 func stripFirstNewline(s string) string {
 401         if len(s) == 0 || s[0] != '\n' {
 402                 return s
 403         }
 404         return s[1:len(s)]
 405 }
 406
 407 func stripEscapedWhitespace(s string) string {
 408         esc := strings.Split(s, "\\\n")
 409         if len(esc) > 1 {
 410                 for i := 1; i < len(esc); i++ {
 411                         esc[i] = strings.TrimLeftFunc(esc[i], unicode.IsSpace)
 412                 }
 413         }
 414         return strings.Join(esc, "")
 415 }
 416
 417 func (p *parser) replaceEscapes(str string) string {
 418         var replaced []rune
 419         s := []byte(str)
 420         r := 0
 421         for r < len(s) {
 422                 if s[r] != '\\' {
 423                         c, size := utf8.DecodeRune(s[r:])
 424                         r += size
 425                         replaced = append(replaced, c)
 426                         continue
 427                 }
 428                 r += 1
 429                 if r >= len(s) {
 430                         p.bug("Escape sequence at end of string.")
 431                         return ""
 432                 }
 433                 switch s[r] {
 434                 default:
 435                         p.bug("Expected valid escape code after \\, but got %q.", s[r])
 436                         return ""
 437                 case 'b':
 438                         replaced = append(replaced, rune(0x0008))
 439                         r += 1
 440                 case 't':
 441                         replaced = append(replaced, rune(0x0009))
 442                         r += 1
 443                 case 'n':
 444                         replaced = append(replaced, rune(0x000A))
 445                         r += 1
 446                 case 'f':
 447                         replaced = append(replaced, rune(0x000C))
 448                         r += 1
 449                 case 'r':
 450                         replaced = append(replaced, rune(0x000D))
 451                         r += 1
 452                 case '"':
 453                         replaced = append(replaced, rune(0x0022))
 454                         r += 1
 455                 case '\\':
 456                         replaced = append(replaced, rune(0x005C))
 457                         r += 1
 458                 case 'u':
 459                         // At this point, we know we have a Unicode escape of the form
 460                         // `uXXXX` at [r, r+5). (Because the lexer guarantees this
 461                         // for us.)
 462                         escaped := p.asciiEscapeToUnicode(s[r+1 : r+5])
 463                         replaced = append(replaced, escaped)
 464                         r += 5
 465                 case 'U':
 466                         // At this point, we know we have a Unicode escape of the form
 467                         // `uXXXX` at [r, r+9). (Because the lexer guarantees this
 468                         // for us.)
 469                         escaped := p.asciiEscapeToUnicode(s[r+1 : r+9])
 470                         replaced = append(replaced, escaped)
 471                         r += 9
 472                 }
 473         }
 474         return string(replaced)
 475 }
 476
 477 func (p *parser) asciiEscapeToUnicode(bs []byte) rune {
 478         s := string(bs)
 479         hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
 480         if err != nil {
 481                 p.bug("Could not parse '%s' as a hexadecimal number, but the "+
 482                         "lexer claims it's OK: %s", s, err)
 483         }
 484
 485         // BUG(burntsushi)
 486         // I honestly don't understand how this works. I can't seem
 487         // to find a way to make this fail. I figured this would fail on invalid
 488         // UTF-8 characters like U+DCFF, but it doesn't.
 489         if !utf8.ValidString(string(rune(hex))) {
 490                 p.panicf("Escaped character '\\u%s' is not valid UTF-8.", s)
 491         }
 492         return rune(hex)
 493 }
 494
 495 func isStringType(ty itemType) bool {
 496         return ty == itemString || ty == itemMultilineString ||
 497                 ty == itemRawString || ty == itemRawMultilineString
 498 }