1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
14 // item represents a token or text string returned from the scanner.
16 typ itemType // The type of this item.
17 pos Pos // The starting position, in bytes, of this item in the input string.
18 val string // The value of this item.
21 func (i item) String() string {
23 case i.typ == itemEOF:
25 case i.typ == itemError:
27 case i.typ > itemKeyword:
28 return fmt.Sprintf("<%s>", i.val)
30 return fmt.Sprintf("%.10q...", i.val)
32 return fmt.Sprintf("%q", i.val)
35 // itemType identifies the type of lex items.
39 itemError itemType = iota // error occurred; value is text of error
40 itemBool // boolean constant
41 itemChar // printable ASCII character; grab bag for comma etc.
42 itemCharConstant // character constant
43 itemComplex // complex constant (1+2i); imaginary is just a number
44 itemColonEquals // colon-equals (':=') introducing a declaration
46 itemField // alphanumeric identifier starting with '.'
47 itemIdentifier // alphanumeric identifier not starting with '.'
48 itemLeftDelim // left action delimiter
49 itemLeftParen // '(' inside action
50 itemNumber // simple number, including imaginary
51 itemPipe // pipe symbol
52 itemRawString // raw quoted string (includes quotes)
53 itemRightDelim // right action delimiter
54 itemRightParen // ')' inside action
55 itemSpace // run of spaces separating arguments
56 itemString // quoted string (includes quotes)
57 itemText // plain text
58 itemVariable // variable starting with '$', such as '$' or '$1' or '$hello'
59 // Keywords appear after all the rest.
60 itemKeyword // used only to delimit the keywords
61 itemDot // the cursor, spelled '.'
62 itemDefine // define keyword
63 itemElse // else keyword
64 itemEnd // end keyword
66 itemNil // the untyped nil constant, easiest to treat as a keyword
67 itemRange // range keyword
68 itemTemplate // template keyword
69 itemWith // with keyword
72 var key = map[string]itemType{
80 "template": itemTemplate,
86 // stateFn represents the state of the scanner as a function that returns the next state.
87 type stateFn func(*lexer) stateFn
89 // lexer holds the state of the scanner.
91 name string // the name of the input; used only for error reports
92 input string // the string being scanned
93 leftDelim string // start of action
94 rightDelim string // end of action
95 state stateFn // the next lexing function to enter
96 pos Pos // current position in the input
97 start Pos // start position of this item
98 width Pos // width of last rune read from input
99 lastPos Pos // position of most recent item returned by nextItem
100 items chan item // channel of scanned items
101 parenDepth int // nesting depth of ( ) exprs
104 // next returns the next rune in the input.
105 func (l *lexer) next() rune {
106 if int(l.pos) >= len(l.input) {
110 r, w := utf8.DecodeRuneInString(l.input[l.pos:])
116 // peek returns but does not consume the next rune in the input.
117 func (l *lexer) peek() rune {
123 // backup steps back one rune. Can only be called once per call of next.
124 func (l *lexer) backup() {
128 // emit passes an item back to the client.
129 func (l *lexer) emit(t itemType) {
130 l.items <- item{t, l.start, l.input[l.start:l.pos]}
134 // ignore skips over the pending input before this point.
135 func (l *lexer) ignore() {
139 // accept consumes the next rune if it's from the valid set.
140 func (l *lexer) accept(valid string) bool {
141 if strings.IndexRune(valid, l.next()) >= 0 {
148 // acceptRun consumes a run of runes from the valid set.
149 func (l *lexer) acceptRun(valid string) {
150 for strings.IndexRune(valid, l.next()) >= 0 {
155 // lineNumber reports which line we're on, based on the position of
156 // the previous item returned by nextItem. Doing it this way
157 // means we don't have to worry about peek double counting.
158 func (l *lexer) lineNumber() int {
159 return 1 + strings.Count(l.input[:l.lastPos], "\n")
162 // errorf returns an error token and terminates the scan by passing
163 // back a nil pointer that will be the next state, terminating l.nextItem.
164 func (l *lexer) errorf(format string, args ...interface{}) stateFn {
165 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)}
169 // nextItem returns the next item from the input.
170 func (l *lexer) nextItem() item {
176 // lex creates a new scanner for the input string.
177 func lex(name, input, left, right string) *lexer {
189 items: make(chan item),
195 // run runs the state machine for the lexer.
196 func (l *lexer) run() {
197 for l.state = lexText; l.state != nil; {
211 // lexText scans until an opening action delimiter, "{{".
212 func lexText(l *lexer) stateFn {
214 if strings.HasPrefix(l.input[l.pos:], l.leftDelim) {
224 // Correctly reached EOF.
232 // lexLeftDelim scans the left delimiter, which is known to be present.
233 func lexLeftDelim(l *lexer) stateFn {
234 l.pos += Pos(len(l.leftDelim))
235 if strings.HasPrefix(l.input[l.pos:], leftComment) {
238 l.emit(itemLeftDelim)
240 return lexInsideAction
243 // lexComment scans a comment. The left comment marker is known to be present.
244 func lexComment(l *lexer) stateFn {
245 l.pos += Pos(len(leftComment))
246 i := strings.Index(l.input[l.pos:], rightComment+l.rightDelim)
248 return l.errorf("unclosed comment")
250 l.pos += Pos(i + len(rightComment) + len(l.rightDelim))
255 // lexRightDelim scans the right delimiter, which is known to be present.
256 func lexRightDelim(l *lexer) stateFn {
257 l.pos += Pos(len(l.rightDelim))
258 l.emit(itemRightDelim)
262 // lexInsideAction scans the elements inside action delimiters.
263 func lexInsideAction(l *lexer) stateFn {
264 // Either number, quoted string, or identifier.
265 // Spaces separate arguments; runs of spaces turn into itemSpace.
266 // Pipe symbols separate and are emitted.
267 if strings.HasPrefix(l.input[l.pos:], l.rightDelim) {
268 if l.parenDepth == 0 {
271 return l.errorf("unclosed left paren")
273 switch r := l.next(); {
274 case r == eof || isEndOfLine(r):
275 return l.errorf("unclosed action")
280 return l.errorf("expected :=")
282 l.emit(itemColonEquals)
294 // special look-ahead for ".field" so we don't break l.backup().
295 if l.pos < Pos(len(l.input)) {
297 if r < '0' || '9' < r {
301 fallthrough // '.' can start a number.
302 case r == '+' || r == '-' || ('0' <= r && r <= '9'):
305 case isAlphaNumeric(r):
309 l.emit(itemLeftParen)
311 return lexInsideAction
313 l.emit(itemRightParen)
315 if l.parenDepth < 0 {
316 return l.errorf("unexpected right paren %#U", r)
318 return lexInsideAction
319 case r <= unicode.MaxASCII && unicode.IsPrint(r):
321 return lexInsideAction
323 return l.errorf("unrecognized character in action: %#U", r)
325 return lexInsideAction
328 // lexSpace scans a run of space characters.
329 // One space has already been seen.
330 func lexSpace(l *lexer) stateFn {
331 for isSpace(l.peek()) {
335 return lexInsideAction
338 // lexIdentifier scans an alphanumeric.
339 func lexIdentifier(l *lexer) stateFn {
342 switch r := l.next(); {
343 case isAlphaNumeric(r):
347 word := l.input[l.start:l.pos]
348 if !l.atTerminator() {
349 return l.errorf("bad character %#U", r)
352 case key[word] > itemKeyword:
356 case word == "true", word == "false":
359 l.emit(itemIdentifier)
364 return lexInsideAction
367 // lexField scans a field: .Alphanumeric.
368 // The . has been scanned.
369 func lexField(l *lexer) stateFn {
370 return lexFieldOrVariable(l, itemField)
373 // lexVariable scans a Variable: $Alphanumeric.
374 // The $ has been scanned.
375 func lexVariable(l *lexer) stateFn {
376 if l.atTerminator() { // Nothing interesting follows -> "$".
378 return lexInsideAction
380 return lexFieldOrVariable(l, itemVariable)
383 // lexVariable scans a field or variable: [.$]Alphanumeric.
384 // The . or $ has been scanned.
385 func lexFieldOrVariable(l *lexer, typ itemType) stateFn {
386 if l.atTerminator() { // Nothing interesting follows -> "." or "$".
387 if typ == itemVariable {
392 return lexInsideAction
397 if !isAlphaNumeric(r) {
402 if !l.atTerminator() {
403 return l.errorf("bad character %#U", r)
406 return lexInsideAction
409 // atTerminator reports whether the input is at valid termination character to
410 // appear after an identifier. Breaks .X.Y into two pieces. Also catches cases
411 // like "$x+2" not being acceptable without a space, in case we decide one
412 // day to implement arithmetic.
413 func (l *lexer) atTerminator() bool {
415 if isSpace(r) || isEndOfLine(r) {
419 case eof, '.', ',', '|', ':', ')', '(':
422 // Does r start the delimiter? This can be ambiguous (with delim=="//", $x/2 will
423 // succeed but should fail) but only in extremely rare cases caused by willfully
424 // bad choice of delimiter.
425 if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r {
431 // lexChar scans a character constant. The initial quote is already
432 // scanned. Syntax checking is done by the parser.
433 func lexChar(l *lexer) stateFn {
438 if r := l.next(); r != eof && r != '\n' {
443 return l.errorf("unterminated character constant")
448 l.emit(itemCharConstant)
449 return lexInsideAction
452 // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This
453 // isn't a perfect number scanner - for instance it accepts "." and "0x0.2"
454 // and "089" - but when it's wrong the input is invalid and the parser (via
455 // strconv) will notice.
456 func lexNumber(l *lexer) stateFn {
458 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
460 if sign := l.peek(); sign == '+' || sign == '-' {
461 // Complex: 1+2i. No spaces, must end in 'i'.
462 if !l.scanNumber() || l.input[l.pos-1] != 'i' {
463 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
469 return lexInsideAction
472 func (l *lexer) scanNumber() bool {
473 // Optional leading sign.
476 digits := "0123456789"
477 if l.accept("0") && l.accept("xX") {
478 digits = "0123456789abcdefABCDEF"
486 l.acceptRun("0123456789")
490 // Next thing mustn't be alphanumeric.
491 if isAlphaNumeric(l.peek()) {
498 // lexQuote scans a quoted string.
499 func lexQuote(l *lexer) stateFn {
504 if r := l.next(); r != eof && r != '\n' {
509 return l.errorf("unterminated quoted string")
515 return lexInsideAction
518 // lexRawQuote scans a raw quoted string.
519 func lexRawQuote(l *lexer) stateFn {
524 return l.errorf("unterminated raw quoted string")
529 l.emit(itemRawString)
530 return lexInsideAction
533 // isSpace reports whether r is a space character.
534 func isSpace(r rune) bool {
535 return r == ' ' || r == '\t'
538 // isEndOfLine reports whether r is an end-of-line character.
539 func isEndOfLine(r rune) bool {
540 return r == '\r' || r == '\n'
543 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
544 func isAlphaNumeric(r rune) bool {
545 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)