3 # Copyright 2010 The Closure Linter Authors. All Rights Reserved.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS-IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 """Metadata pass for annotating tokens in EcmaScript files."""
19 __author__ = ('robbyw@google.com (Robert Walker)')
21 from closure_linter import javascripttokens
22 from closure_linter import tokenutil
25 TokenType = javascripttokens.JavaScriptTokenType
28 class ParseError(Exception):
29 """Exception indicating a parse error at the given token.
32 token: The token where the parse error occurred.
35 def __init__(self, token, message=None):
36 """Initialize a parse error at the given token with an optional message.
39 token: The token where the parse error occurred.
40 message: A message describing the parse error.
42 Exception.__init__(self, message)
46 class EcmaContext(object):
47 """Context object for EcmaScript languages.
50 type: The context type.
51 start_token: The token where this context starts.
52 end_token: The token where this context ends.
53 parent: The parent context.
62 # A pseudo-block of code for a given case or default section.
63 CASE_BLOCK = 'case_block'
65 # Block of statements in a for loop's parentheses.
66 FOR_GROUP_BLOCK = 'for_block'
68 # An implied block of code for 1 line if, while, and for statements
69 IMPLIED_BLOCK = 'implied_block'
71 # An index in to an array or object.
74 # An array literal in [].
75 ARRAY_LITERAL = 'array_literal'
77 # An object literal in {}.
78 OBJECT_LITERAL = 'object_literal'
80 # An individual element in an array or object literal.
81 LITERAL_ELEMENT = 'literal_element'
83 # The portion of a ternary statement between ? and :
84 TERNARY_TRUE = 'ternary_true'
86 # The portion of a ternary statment after :
87 TERNARY_FALSE = 'ternary_false'
89 # The entire switch statment. This will contain a GROUP with the variable
90 # and a BLOCK with the code.
92 # Since that BLOCK is not a normal block, it can not contain statements except
93 # for case and default.
102 # An individual statement.
103 STATEMENT = 'statement'
105 # Code within parentheses.
108 # Parameter names in a function declaration.
109 PARAMETERS = 'parameters'
111 # A set of variable declarations appearing after the 'var' keyword.
114 # Context types that are blocks.
115 BLOCK_TYPES = frozenset([
116 ROOT, BLOCK, CASE_BLOCK, FOR_GROUP_BLOCK, IMPLIED_BLOCK])
118 def __init__(self, type, start_token, parent):
119 """Initializes the context object.
122 type: The context type.
123 start_token: The token where this context starts.
124 parent: The parent context.
127 self.start_token = start_token
128 self.end_token = None
132 """Returns a string representation of the context object."""
136 stack.append(context.type)
137 context = context.parent
138 return 'Context(%s)' % ' > '.join(stack)
141 class EcmaMetaData(object):
142 """Token metadata for EcmaScript languages.
145 last_code: The last code token to appear before this one.
146 context: The context this token appears in.
147 operator_type: The operator type, will be one of the *_OPERATOR constants
151 UNARY_OPERATOR = 'unary'
153 UNARY_POST_OPERATOR = 'unary_post'
155 BINARY_OPERATOR = 'binary'
157 TERNARY_OPERATOR = 'ternary'
160 """Initializes a token metadata object."""
161 self.last_code = None
163 self.operator_type = None
164 self.is_implied_semicolon = False
165 self.is_implied_block = False
166 self.is_implied_block_close = False
169 """Returns a string representation of the context object."""
170 parts = ['%r' % self.context]
171 if self.operator_type:
172 parts.append('optype: %r' % self.operator_type)
173 if self.is_implied_semicolon:
174 parts.append('implied;')
175 return 'MetaData(%s)' % ', '.join(parts)
177 def IsUnaryOperator(self):
178 return self.operator_type in (EcmaMetaData.UNARY_OPERATOR,
179 EcmaMetaData.UNARY_POST_OPERATOR)
181 def IsUnaryPostOperator(self):
182 return self.operator_type == EcmaMetaData.UNARY_POST_OPERATOR
185 class EcmaMetaDataPass(object):
186 """A pass that iterates over all tokens and builds metadata about them."""
189 """Initialize the meta data pass object."""
193 """Resets the metadata pass to prepare for the next file."""
196 self._AddContext(EcmaContext.ROOT)
197 self._last_code = None
199 def _CreateContext(self, type):
200 """Overridable by subclasses to create the appropriate context type."""
201 return EcmaContext(type, self._token, self._context)
203 def _CreateMetaData(self):
204 """Overridable by subclasses to create the appropriate metadata type."""
205 return EcmaMetaData()
207 def _AddContext(self, type):
208 """Adds a context of the given type to the context stack.
211 type: The type of context to create
213 self._context = self._CreateContext(type)
215 def _PopContext(self):
216 """Moves up one level in the context stack.
222 ParseError: If the root context is popped.
224 top_context = self._context
225 top_context.end_token = self._token
226 self._context = top_context.parent
230 raise ParseError(self._token)
232 def _PopContextType(self, *stop_types):
233 """Pops the context stack until a context of the given type is popped.
236 stop_types: The types of context to pop to - stops at the first match.
239 The context object of the given type that was popped.
242 while not last or last.type not in stop_types:
243 last = self._PopContext()
246 def _EndStatement(self):
247 """Process the end of a statement."""
248 self._PopContextType(EcmaContext.STATEMENT)
249 if self._context.type == EcmaContext.IMPLIED_BLOCK:
250 self._token.metadata.is_implied_block_close = True
253 def _ProcessContext(self):
254 """Process the context at the current token.
257 The context that should be assigned to the current token, or None if
258 the current context after this method should be used.
261 ParseError: When the token appears in an invalid context.
264 token_type = token.type
266 if self._context.type in EcmaContext.BLOCK_TYPES:
267 # Whenever we're in a block, we add a statement context. We make an
268 # exception for switch statements since they can only contain case: and
269 # default: and therefore don't directly contain statements.
270 # The block we add here may be immediately removed in some cases, but
271 # that causes no harm.
272 parent = self._context.parent
273 if not parent or parent.type != EcmaContext.SWITCH:
274 self._AddContext(EcmaContext.STATEMENT)
276 elif self._context.type == EcmaContext.ARRAY_LITERAL:
277 self._AddContext(EcmaContext.LITERAL_ELEMENT)
279 if token_type == TokenType.START_PAREN:
280 if self._last_code and self._last_code.IsKeyword('for'):
281 # for loops contain multiple statements in the group unlike while,
283 self._AddContext(EcmaContext.FOR_GROUP_BLOCK)
285 self._AddContext(EcmaContext.GROUP)
287 elif token_type == TokenType.END_PAREN:
288 result = self._PopContextType(EcmaContext.GROUP,
289 EcmaContext.FOR_GROUP_BLOCK)
290 keyword_token = result.start_token.metadata.last_code
291 # keyword_token will not exist if the open paren is the first line of the
292 # file, for example if all code is wrapped in an immediately executed
293 # annonymous function.
294 if keyword_token and keyword_token.string in ('if', 'for', 'while'):
295 next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)
296 if next_code.type != TokenType.START_BLOCK:
297 # Check for do-while.
299 pre_keyword_token = keyword_token.metadata.last_code
300 if (pre_keyword_token and
301 pre_keyword_token.type == TokenType.END_BLOCK):
302 start_block_token = pre_keyword_token.metadata.context.start_token
303 is_do_while = start_block_token.metadata.last_code.string == 'do'
305 # If it's not do-while, it's an implied block.
307 self._AddContext(EcmaContext.IMPLIED_BLOCK)
308 token.metadata.is_implied_block = True
312 # else (not else if) with no open brace after it should be considered the
313 # start of an implied block, similar to the case with if, for, and while
315 elif (token_type == TokenType.KEYWORD and
316 token.string == 'else'):
317 next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)
318 if (next_code.type != TokenType.START_BLOCK and
319 (next_code.type != TokenType.KEYWORD or next_code.string != 'if')):
320 self._AddContext(EcmaContext.IMPLIED_BLOCK)
321 token.metadata.is_implied_block = True
323 elif token_type == TokenType.START_PARAMETERS:
324 self._AddContext(EcmaContext.PARAMETERS)
326 elif token_type == TokenType.END_PARAMETERS:
327 return self._PopContextType(EcmaContext.PARAMETERS)
329 elif token_type == TokenType.START_BRACKET:
330 if (self._last_code and
331 self._last_code.type in TokenType.EXPRESSION_ENDER_TYPES):
332 self._AddContext(EcmaContext.INDEX)
334 self._AddContext(EcmaContext.ARRAY_LITERAL)
336 elif token_type == TokenType.END_BRACKET:
337 return self._PopContextType(EcmaContext.INDEX, EcmaContext.ARRAY_LITERAL)
339 elif token_type == TokenType.START_BLOCK:
340 if (self._last_code.type in (TokenType.END_PAREN,
341 TokenType.END_PARAMETERS) or
342 self._last_code.IsKeyword('else') or
343 self._last_code.IsKeyword('do') or
344 self._last_code.IsKeyword('try') or
345 self._last_code.IsKeyword('finally') or
346 (self._last_code.IsOperator(':') and
347 self._last_code.metadata.context.type == EcmaContext.CASE_BLOCK)):
348 # else, do, try, and finally all might have no () before {.
349 # Also, handle the bizzare syntax case 10: {...}.
350 self._AddContext(EcmaContext.BLOCK)
352 self._AddContext(EcmaContext.OBJECT_LITERAL)
354 elif token_type == TokenType.END_BLOCK:
355 context = self._PopContextType(EcmaContext.BLOCK,
356 EcmaContext.OBJECT_LITERAL)
357 if self._context.type == EcmaContext.SWITCH:
358 # The end of the block also means the end of the switch statement it
360 return self._PopContext()
363 elif token.IsKeyword('switch'):
364 self._AddContext(EcmaContext.SWITCH)
366 elif (token_type == TokenType.KEYWORD and
367 token.string in ('case', 'default')):
368 # Pop up to but not including the switch block.
369 while self._context.parent.type != EcmaContext.SWITCH:
372 elif token.IsOperator('?'):
373 self._AddContext(EcmaContext.TERNARY_TRUE)
375 elif token.IsOperator(':'):
376 if self._context.type == EcmaContext.OBJECT_LITERAL:
377 self._AddContext(EcmaContext.LITERAL_ELEMENT)
379 elif self._context.type == EcmaContext.TERNARY_TRUE:
381 self._AddContext(EcmaContext.TERNARY_FALSE)
383 # Handle nested ternary statements like:
384 # foo = bar ? baz ? 1 : 2 : 3
385 # When we encounter the second ":" the context is
386 # ternary_false > ternary_true > statement > root
387 elif (self._context.type == EcmaContext.TERNARY_FALSE and
388 self._context.parent.type == EcmaContext.TERNARY_TRUE):
389 self._PopContext() # Leave current ternary false context.
390 self._PopContext() # Leave current parent ternary true
391 self._AddContext(EcmaContext.TERNARY_FALSE)
393 elif self._context.parent.type == EcmaContext.SWITCH:
394 self._AddContext(EcmaContext.CASE_BLOCK)
396 elif token.IsKeyword('var'):
397 self._AddContext(EcmaContext.VAR)
399 elif token.IsOperator(','):
400 while self._context.type not in (EcmaContext.VAR,
401 EcmaContext.ARRAY_LITERAL,
402 EcmaContext.OBJECT_LITERAL,
403 EcmaContext.STATEMENT,
404 EcmaContext.PARAMETERS,
408 elif token_type == TokenType.SEMICOLON:
411 def Process(self, first_token):
412 """Processes the token stream starting with the given token."""
413 self._token = first_token
417 if self._token.IsCode():
418 self._last_code = self._token
420 self._token = self._token.next
423 self._PopContextType(self, EcmaContext.ROOT)
425 # Ignore the "popped to root" error.
428 def _ProcessToken(self):
429 """Process the given token."""
431 token.metadata = self._CreateMetaData()
432 context = (self._ProcessContext() or self._context)
433 token.metadata.context = context
434 token.metadata.last_code = self._last_code
436 # Determine the operator type of the token, if applicable.
437 if token.type == TokenType.OPERATOR:
438 token.metadata.operator_type = self._GetOperatorType(token)
440 # Determine if there is an implied semicolon after the token.
441 if token.type != TokenType.SEMICOLON:
442 next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)
443 # A statement like if (x) does not need a semicolon after it
444 is_implied_block = self._context == EcmaContext.IMPLIED_BLOCK
445 is_last_code_in_line = token.IsCode() and (
446 not next_code or next_code.line_number != token.line_number)
447 is_continued_identifier = (token.type == TokenType.IDENTIFIER and
448 token.string.endswith('.'))
449 is_continued_operator = (token.type == TokenType.OPERATOR and
450 not token.metadata.IsUnaryPostOperator())
451 is_continued_dot = token.string == '.'
452 next_code_is_operator = next_code and next_code.type == TokenType.OPERATOR
453 next_code_is_dot = next_code and next_code.string == '.'
454 is_end_of_block = (token.type == TokenType.END_BLOCK and
455 token.metadata.context.type != EcmaContext.OBJECT_LITERAL)
456 is_multiline_string = token.type == TokenType.STRING_TEXT
457 next_code_is_block = next_code and next_code.type == TokenType.START_BLOCK
458 if (is_last_code_in_line and
459 self._StatementCouldEndInContext() and
460 not is_multiline_string and
461 not is_end_of_block and
462 not is_continued_identifier and
463 not is_continued_operator and
464 not is_continued_dot and
465 not next_code_is_dot and
466 not next_code_is_operator and
467 not is_implied_block and
468 not next_code_is_block):
469 token.metadata.is_implied_semicolon = True
472 def _StatementCouldEndInContext(self):
473 """Returns whether the current statement (if any) may end in this context."""
474 # In the basic statement or variable declaration context, statement can
475 # always end in this context.
476 if self._context.type in (EcmaContext.STATEMENT, EcmaContext.VAR):
479 # End of a ternary false branch inside a statement can also be the
480 # end of the statement, for example:
481 # var x = foo ? foo.bar() : null
482 # In this case the statement ends after the null, when the context stack
483 # looks like ternary_false > var > statement > root.
484 if (self._context.type == EcmaContext.TERNARY_FALSE and
485 self._context.parent.type in (EcmaContext.STATEMENT, EcmaContext.VAR)):
488 # In all other contexts like object and array literals, ternary true, etc.
489 # the statement can't yet end.
492 def _GetOperatorType(self, token):
493 """Returns the operator type of the given operator token.
496 token: The token to get arity for.
499 The type of the operator. One of the *_OPERATOR constants defined in
502 if token.string == '?':
503 return EcmaMetaData.TERNARY_OPERATOR
505 if token.string in TokenType.UNARY_OPERATORS:
506 return EcmaMetaData.UNARY_OPERATOR
508 last_code = token.metadata.last_code
509 if not last_code or last_code.type == TokenType.END_BLOCK:
510 return EcmaMetaData.UNARY_OPERATOR
512 if (token.string in TokenType.UNARY_POST_OPERATORS and
513 last_code.type in TokenType.EXPRESSION_ENDER_TYPES):
514 return EcmaMetaData.UNARY_POST_OPERATOR
516 if (token.string in TokenType.UNARY_OK_OPERATORS and
517 last_code.type not in TokenType.EXPRESSION_ENDER_TYPES and
518 last_code.string not in TokenType.UNARY_POST_OPERATORS):
519 return EcmaMetaData.UNARY_OPERATOR
521 return EcmaMetaData.BINARY_OPERATOR