3 # Copyright 2010 The Closure Linter Authors. All Rights Reserved.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS-IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 """Methods for checking EcmaScript files for indentation issues."""
19 __author__ = ('robbyw@google.com (Robert Walker)')
21 from closure_linter import ecmametadatapass
22 from closure_linter import errors
23 from closure_linter import javascripttokens
24 from closure_linter import tokenutil
25 from closure_linter.common import error
26 from closure_linter.common import position
28 import gflags as flags
30 flags.DEFINE_boolean('debug_indentation', False,
31 'Whether to print debugging information for indentation.')
35 Context = ecmametadatapass.EcmaContext
37 Position = position.Position
38 Type = javascripttokens.JavaScriptTokenType
41 # The general approach:
43 # 1. Build a stack of tokens that can affect indentation.
44 # For each token, we determine if it is a block or continuation token.
45 # Some tokens need to be temporarily overwritten in case they are removed
46 # before the end of the line.
47 # Much of the work here is determining which tokens to keep on the stack
48 # at each point. Operators, for example, should be removed once their
49 # expression or line is gone, while parentheses must stay until the matching
50 # end parentheses is found.
52 # 2. Given that stack, determine the allowable indentations.
53 # Due to flexible indentation rules in JavaScript, there may be many
54 # allowable indentations for each stack. We follows the general
55 # "no false positives" approach of GJsLint and build the most permissive
59 class TokenInfo(object):
60 """Stores information about a token.
64 is_block: Whether the token represents a block indentation.
65 is_transient: Whether the token should be automatically removed without
66 finding a matching end token.
67 overridden_by: TokenInfo for a token that overrides the indentation that
68 this token would require.
69 is_permanent_override: Whether the override on this token should persist
70 even after the overriding token is removed from the stack. For example:
74 needs this to be set so the last line is not required to be a continuation
76 line_number: The effective line number of this token. Will either be the
77 actual line number or the one before it in the case of a mis-wrapped
81 def __init__(self, token, is_block=False):
82 """Initializes a TokenInfo object.
86 is_block: Whether the token represents a block indentation.
89 self.overridden_by = None
90 self.is_permanent_override = False
91 self.is_block = is_block
92 self.is_transient = not is_block and not token.type in (
93 Type.START_PAREN, Type.START_PARAMETERS)
94 self.line_number = token.line_number
97 result = '\n %s' % self.token
98 if self.overridden_by:
99 result = '%s OVERRIDDEN [by "%s"]' % (
100 result, self.overridden_by.token.string)
101 result += ' {is_block: %s, is_transient: %s}' % (
102 self.is_block, self.is_transient)
106 class IndentationRules(object):
107 """EmcaScript indentation rules.
109 Can be used to find common indentation errors in JavaScript, ActionScript and
110 other Ecma like scripting languages.
114 """Initializes the IndentationRules checker."""
117 # Map from line number to number of characters it is off in indentation.
118 self._start_index_offset = {}
122 old_stack = self._stack
124 raise Exception("INTERNAL ERROR: indentation stack is not empty: %r" %
127 def CheckToken(self, token, state):
128 """Checks a token for indentation errors.
131 token: The current token under consideration
132 state: Additional information about the current tree state
135 An error array [error code, error string, error token] if the token is
136 improperly indented, or None if indentation is correct.
139 token_type = token.type
140 indentation_errors = []
142 is_first = self._IsFirstNonWhitespaceTokenInLine(token)
144 # Add tokens that could decrease indentation before checking.
145 if token_type == Type.END_PAREN:
146 self._PopTo(Type.START_PAREN)
148 elif token_type == Type.END_PARAMETERS:
149 self._PopTo(Type.START_PARAMETERS)
151 elif token_type == Type.END_BRACKET:
152 self._PopTo(Type.START_BRACKET)
154 elif token_type == Type.END_BLOCK:
155 start_token = self._PopTo(Type.START_BLOCK)
156 # Check for required goog.scope comment.
158 goog_scope = self._GoogScopeOrNone(start_token.token)
159 if goog_scope is not None:
160 if not token.line.endswith('; // goog.scope\n'):
161 if (token.line.find('//') > -1 and
162 token.line.find('goog.scope') >
163 token.line.find('//')):
164 indentation_errors.append([
165 errors.MALFORMED_END_OF_SCOPE_COMMENT,
166 ('Malformed end of goog.scope comment. Please use the '
167 'exact following syntax to close the scope:\n'
168 '}); // goog.scope'),
170 Position(token.start_index, token.length)])
172 indentation_errors.append([
173 errors.MISSING_END_OF_SCOPE_COMMENT,
174 ('Missing comment for end of goog.scope which opened at line '
175 '%d. End the scope with:\n'
176 '}); // goog.scope' %
177 (start_token.line_number)),
179 Position(token.start_index, token.length)])
181 elif token_type == Type.KEYWORD and token.string in ('case', 'default'):
182 self._Add(self._PopTo(Type.START_BLOCK))
184 elif is_first and token.string == '.':
185 # This token should have been on the previous line, so treat it as if it
187 info = TokenInfo(token)
188 info.line_number = token.line_number - 1
191 elif token_type == Type.SEMICOLON:
194 not_binary_operator = (token_type != Type.OPERATOR or
195 token.metadata.IsUnaryOperator())
196 not_dot = token.string != '.'
197 if is_first and not_binary_operator and not_dot and token.type not in (
198 Type.COMMENT, Type.DOC_PREFIX, Type.STRING_TEXT):
199 if flags.FLAGS.debug_indentation:
200 print 'Line #%d: stack %r' % (token.line_number, stack)
202 # Ignore lines that start in JsDoc since we don't check them properly yet.
203 # TODO(robbyw): Support checking JsDoc indentation.
204 # Ignore lines that start as multi-line strings since indentation is N/A.
205 # Ignore lines that start with operators since we report that already.
206 # Ignore lines with tabs since we report that already.
207 expected = self._GetAllowableIndentations()
208 actual = self._GetActualIndentation(token)
210 # Special case comments describing else, case, and default. Allow them
211 # to outdent to the parent block.
212 if token_type in Type.COMMENT_TYPES:
213 next_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES)
214 if next_code and next_code.type == Type.END_BLOCK:
215 next_code = tokenutil.SearchExcept(next_code, Type.NON_CODE_TYPES)
216 if next_code and next_code.string in ('else', 'case', 'default'):
217 # TODO(robbyw): This almost certainly introduces false negatives.
218 expected |= self._AddToEach(expected, -2)
220 if actual >= 0 and actual not in expected:
221 expected = sorted(expected)
222 indentation_errors.append([
223 errors.WRONG_INDENTATION,
224 'Wrong indentation: expected any of {%s} but got %d' % (
226 ['%d' % x for x in expected]), actual),
228 Position(actual, expected[0])])
229 self._start_index_offset[token.line_number] = expected[0] - actual
231 # Add tokens that could increase indentation.
232 if token_type == Type.START_BRACKET:
233 self._Add(TokenInfo(token=token,
234 is_block=token.metadata.context.type == Context.ARRAY_LITERAL))
236 elif token_type == Type.START_BLOCK or token.metadata.is_implied_block:
237 self._Add(TokenInfo(token=token, is_block=True))
239 elif token_type in (Type.START_PAREN, Type.START_PARAMETERS):
240 self._Add(TokenInfo(token=token, is_block=False))
242 elif token_type == Type.KEYWORD and token.string == 'return':
243 self._Add(TokenInfo(token))
245 elif not token.IsLastInLine() and (
246 token.IsAssignment() or token.IsOperator('?')):
247 self._Add(TokenInfo(token=token))
249 # Handle implied block closes.
250 if token.metadata.is_implied_block_close:
251 self._PopToImpliedBlock()
253 # Add some tokens only if they appear at the end of the line.
254 is_last = self._IsLastCodeInLine(token)
256 if token_type == Type.OPERATOR:
257 if token.string == ':':
258 if (stack and stack[-1].token.string == '?'):
259 # When a ternary : is on a different line than its '?', it doesn't
261 if (token.line_number == stack[-1].token.line_number):
262 self._Add(TokenInfo(token))
263 elif token.metadata.context.type == Context.CASE_BLOCK:
264 # Pop transient tokens from say, line continuations, e.g.,
267 # Want to pop the transient 4 space continuation indent.
269 # Starting the body of the case statement, which is a type of
271 self._Add(TokenInfo(token=token, is_block=True))
272 elif token.metadata.context.type == Context.LITERAL_ELEMENT:
273 # When in an object literal, acts as operator indicating line
275 self._Add(TokenInfo(token))
278 # ':' might also be a statement label, no effect on indentation in
282 elif token.string != ',':
283 self._Add(TokenInfo(token))
285 # The token is a comma.
286 if token.metadata.context.type == Context.VAR:
287 self._Add(TokenInfo(token))
288 elif token.metadata.context.type != Context.PARAMETERS:
291 elif (token.string.endswith('.')
292 and token_type in (Type.IDENTIFIER, Type.NORMAL)):
293 self._Add(TokenInfo(token))
294 elif token_type == Type.PARAMETERS and token.string.endswith(','):
296 self._Add(TokenInfo(token))
297 elif token.metadata.is_implied_semicolon:
299 elif token.IsAssignment():
300 self._Add(TokenInfo(token))
302 return indentation_errors
304 def _AddToEach(self, original, amount):
305 """Returns a new set with the given amount added to each element.
308 original: The original set of numbers
309 amount: The amount to add to each element
312 A new set containing each element of the original set added to the amount.
314 return set([x + amount for x in original])
316 _HARD_STOP_TYPES = (Type.START_PAREN, Type.START_PARAMETERS,
319 _HARD_STOP_STRINGS = ('return', '?')
321 def _IsHardStop(self, token):
322 """Determines if the given token can have a hard stop after it.
324 Hard stops are indentations defined by the position of another token as in
325 indentation lined up with return, (, [, and ?.
327 return (token.type in self._HARD_STOP_TYPES or
328 token.string in self._HARD_STOP_STRINGS or
329 token.IsAssignment())
331 def _GetAllowableIndentations(self):
332 """Computes the set of allowable indentations.
335 The set of allowable indentations, given the current stack.
340 # Whether the tokens are still in the same continuation, meaning additional
341 # indentation is optional. As an example:
345 # The second '+' does not add any required indentation.
346 in_same_continuation = False
348 for token_info in self._stack:
349 token = token_info.token
351 # Handle normal additive indentation tokens.
352 if not token_info.overridden_by and token.string != 'return':
353 if token_info.is_block:
354 expected = self._AddToEach(expected, 2)
355 hard_stops = self._AddToEach(hard_stops, 2)
356 in_same_continuation = False
357 elif in_same_continuation:
358 expected |= self._AddToEach(expected, 4)
359 hard_stops |= self._AddToEach(hard_stops, 4)
361 expected = self._AddToEach(expected, 4)
362 hard_stops |= self._AddToEach(hard_stops, 4)
363 in_same_continuation = True
365 # Handle hard stops after (, [, return, =, and ?
366 if self._IsHardStop(token):
367 override_is_hard_stop = (token_info.overridden_by and
368 self._IsHardStop(token_info.overridden_by.token))
369 if not override_is_hard_stop:
370 start_index = token.start_index
371 if token.line_number in self._start_index_offset:
372 start_index += self._start_index_offset[token.line_number]
373 if (token.type in (Type.START_PAREN, Type.START_PARAMETERS) and
374 not token_info.overridden_by):
375 hard_stops.add(start_index + 1)
377 elif token.string == 'return' and not token_info.overridden_by:
378 hard_stops.add(start_index + 7)
380 elif (token.type == Type.START_BRACKET):
381 hard_stops.add(start_index + 1)
383 elif token.IsAssignment():
384 hard_stops.add(start_index + len(token.string) + 1)
386 elif token.IsOperator('?') and not token_info.overridden_by:
387 hard_stops.add(start_index + 2)
389 return (expected | hard_stops) or set([0])
391 def _GetActualIndentation(self, token):
392 """Gets the actual indentation of the line containing the given token.
395 token: Any token on the line.
398 The actual indentation of the line containing the given token. Returns
399 -1 if this line should be ignored due to the presence of tabs.
401 # Move to the first token in the line
402 token = tokenutil.GetFirstTokenInSameLine(token)
404 # If it is whitespace, it is the indentation.
405 if token.type == Type.WHITESPACE:
406 if token.string.find('\t') >= 0:
409 return len(token.string)
410 elif token.type == Type.PARAMETERS:
411 return len(token.string) - len(token.string.lstrip())
415 def _IsFirstNonWhitespaceTokenInLine(self, token):
416 """Determines if the given token is the first non-space token on its line.
422 True if the token is the first non-whitespace token on its line.
424 if token.type in (Type.WHITESPACE, Type.BLANK_LINE):
426 if token.IsFirstInLine():
428 return (token.previous and token.previous.IsFirstInLine() and
429 token.previous.type == Type.WHITESPACE)
431 def _IsLastCodeInLine(self, token):
432 """Determines if the given token is the last code token on its line.
438 True if the token is the last code token on its line.
440 if token.type in Type.NON_CODE_TYPES:
445 if not token or token.line_number != start_token.line_number:
447 if token.type not in Type.NON_CODE_TYPES:
450 def _GoogScopeOrNone(self, token):
451 """Determines if the given START_BLOCK is part of a goog.scope statement.
454 token: A token of type START_BLOCK.
457 The goog.scope function call token, or None if such call doesn't exist.
459 # Search for a goog.scope statement, which will be 5 tokens before the
460 # block. Illustration of the tokens found prior to the start block:
461 # goog.scope(function() {
464 maybe_goog_scope = token
465 for unused_i in xrange(5):
466 maybe_goog_scope = (maybe_goog_scope.previous if maybe_goog_scope and
467 maybe_goog_scope.previous else None)
468 if maybe_goog_scope and maybe_goog_scope.string == 'goog.scope':
469 return maybe_goog_scope
471 def _Add(self, token_info):
472 """Adds the given token info to the stack.
475 token_info: The token information to add.
477 if self._stack and self._stack[-1].token == token_info.token:
478 # Don't add the same token twice.
481 if token_info.is_block or token_info.token.type == Type.START_PAREN:
482 token_info.overridden_by = self._GoogScopeOrNone(token_info.token)
484 while index <= len(self._stack):
485 stack_info = self._stack[-index]
486 stack_token = stack_info.token
488 if stack_info.line_number == token_info.line_number:
489 # In general, tokens only override each other when they are on
491 stack_info.overridden_by = token_info
492 if (token_info.token.type == Type.START_BLOCK and
493 (stack_token.IsAssignment() or
494 stack_token.type in (Type.IDENTIFIER, Type.START_PAREN))):
495 # Multi-line blocks have lasting overrides, as in:
500 close_block = token_info.token.metadata.context.end_token
501 stack_info.is_permanent_override = \
502 close_block.line_number != token_info.token.line_number
503 elif (token_info.token.type == Type.START_BLOCK and
504 token_info.token.metadata.context.type == Context.BLOCK and
505 (stack_token.IsAssignment() or
506 stack_token.type == Type.IDENTIFIER)):
507 # When starting a function block, the override can transcend lines.
509 # long.long.name = function(
511 # In this case the { and the = are on different lines. But the
512 # override should still apply.
513 stack_info.overridden_by = token_info
514 stack_info.is_permanent_override = True
519 self._stack.append(token_info)
522 """Pops the top token from the stack.
525 The popped token info.
527 token_info = self._stack.pop()
528 if token_info.token.type not in (Type.START_BLOCK, Type.START_BRACKET):
529 # Remove any temporary overrides.
530 self._RemoveOverrides(token_info)
532 # For braces and brackets, which can be object and array literals, remove
533 # overrides when the literal is closed on the same line.
534 token_check = token_info.token
535 same_type = token_check.type
537 if token_info.token.type == Type.START_BRACKET:
538 goal_type = Type.END_BRACKET
540 goal_type = Type.END_BLOCK
541 line_number = token_info.token.line_number
543 while token_check and token_check.line_number == line_number:
544 if token_check.type == goal_type:
547 self._RemoveOverrides(token_info)
549 if token_check.type == same_type:
551 token_check = token_check.next
554 def _PopToImpliedBlock(self):
555 """Pops the stack until an implied block token is found."""
556 while not self._Pop().token.metadata.is_implied_block:
559 def _PopTo(self, stop_type):
560 """Pops the stack until a token of the given type is popped.
563 stop_type: The type of token to pop to.
566 The token info of the given type that was popped.
571 if last.token.type == stop_type:
575 def _RemoveOverrides(self, token_info):
576 """Marks any token that was overridden by this token as active again.
579 token_info: The token that is being removed from the stack.
581 for stack_token in self._stack:
582 if (stack_token.overridden_by == token_info and
583 not stack_token.is_permanent_override):
584 stack_token.overridden_by = None
586 def _PopTransient(self):
587 """Pops all transient tokens - i.e. not blocks, literals, or parens."""
588 while self._stack and self._stack[-1].is_transient: