src/third_party/closure_linter/closure_linter/indentation.py

   1 #!/usr/bin/env python
   2 #
   3 # Copyright 2010 The Closure Linter Authors. All Rights Reserved.
   4 #
   5 # Licensed under the Apache License, Version 2.0 (the "License");
   6 # you may not use this file except in compliance with the License.
   7 # You may obtain a copy of the License at
   8 #
   9 #      http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing, software
  12 # distributed under the License is distributed on an "AS-IS" BASIS,
  13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16
  17 """Methods for checking EcmaScript files for indentation issues."""
  18
  19 __author__ = ('robbyw@google.com (Robert Walker)')
  20
  21 from closure_linter import ecmametadatapass
  22 from closure_linter import errors
  23 from closure_linter import javascripttokens
  24 from closure_linter import tokenutil
  25 from closure_linter.common import error
  26 from closure_linter.common import position
  27
  28 import gflags as flags
  29
  30 flags.DEFINE_boolean('debug_indentation', False,
  31                      'Whether to print debugging information for indentation.')
  32
  33
  34 # Shorthand
  35 Context = ecmametadatapass.EcmaContext
  36 Error = error.Error
  37 Position = position.Position
  38 Type = javascripttokens.JavaScriptTokenType
  39
  40
  41 # The general approach:
  42 #
  43 # 1. Build a stack of tokens that can affect indentation.
  44 #    For each token, we determine if it is a block or continuation token.
  45 #    Some tokens need to be temporarily overwritten in case they are removed
  46 #    before the end of the line.
  47 #    Much of the work here is determining which tokens to keep on the stack
  48 #    at each point.  Operators, for example, should be removed once their
  49 #    expression or line is gone, while parentheses must stay until the matching
  50 #    end parentheses is found.
  51 #
  52 # 2. Given that stack, determine the allowable indentations.
  53 #    Due to flexible indentation rules in JavaScript, there may be many
  54 #    allowable indentations for each stack.  We follows the general
  55 #    "no false positives" approach of GJsLint and build the most permissive
  56 #    set possible.
  57
  58
  59 class TokenInfo(object):
  60   """Stores information about a token.
  61
  62   Attributes:
  63     token: The token
  64     is_block: Whether the token represents a block indentation.
  65     is_transient: Whether the token should be automatically removed without
  66       finding a matching end token.
  67     overridden_by: TokenInfo for a token that overrides the indentation that
  68       this token would require.
  69     is_permanent_override: Whether the override on this token should persist
  70       even after the overriding token is removed from the stack.  For example:
  71       x([
  72         1],
  73       2);
  74       needs this to be set so the last line is not required to be a continuation
  75       indent.
  76     line_number: The effective line number of this token.  Will either be the
  77       actual line number or the one before it in the case of a mis-wrapped
  78       operator.
  79   """
  80
  81   def __init__(self, token, is_block=False):
  82     """Initializes a TokenInfo object.
  83
  84     Args:
  85       token: The token
  86       is_block: Whether the token represents a block indentation.
  87     """
  88     self.token = token
  89     self.overridden_by = None
  90     self.is_permanent_override = False
  91     self.is_block = is_block
  92     self.is_transient = not is_block and not token.type in (
  93         Type.START_PAREN, Type.START_PARAMETERS)
  94     self.line_number = token.line_number
  95
  96   def __repr__(self):
  97     result = '\n  %s' % self.token
  98     if self.overridden_by:
  99       result = '%s OVERRIDDEN [by "%s"]' % (
 100           result, self.overridden_by.token.string)
 101     result += ' {is_block: %s, is_transient: %s}' % (
 102         self.is_block, self.is_transient)
 103     return result
 104
 105
 106 class IndentationRules(object):
 107   """EmcaScript indentation rules.
 108
 109   Can be used to find common indentation errors in JavaScript, ActionScript and
 110   other Ecma like scripting languages.
 111   """
 112
 113   def __init__(self):
 114     """Initializes the IndentationRules checker."""
 115     self._stack = []
 116
 117     # Map from line number to number of characters it is off in indentation.
 118     self._start_index_offset = {}
 119
 120   def Finalize(self):
 121     if self._stack:
 122       old_stack = self._stack
 123       self._stack = []
 124       raise Exception("INTERNAL ERROR: indentation stack is not empty: %r" %
 125                       old_stack)
 126
 127   def CheckToken(self, token, state):
 128     """Checks a token for indentation errors.
 129
 130     Args:
 131       token: The current token under consideration
 132       state: Additional information about the current tree state
 133
 134     Returns:
 135       An error array [error code, error string, error token] if the token is
 136       improperly indented, or None if indentation is correct.
 137     """
 138
 139     token_type = token.type
 140     indentation_errors = []
 141     stack = self._stack
 142     is_first = self._IsFirstNonWhitespaceTokenInLine(token)
 143
 144     # Add tokens that could decrease indentation before checking.
 145     if token_type == Type.END_PAREN:
 146       self._PopTo(Type.START_PAREN)
 147
 148     elif token_type == Type.END_PARAMETERS:
 149       self._PopTo(Type.START_PARAMETERS)
 150
 151     elif token_type == Type.END_BRACKET:
 152       self._PopTo(Type.START_BRACKET)
 153
 154     elif token_type == Type.END_BLOCK:
 155       start_token = self._PopTo(Type.START_BLOCK)
 156       # Check for required goog.scope comment.
 157       if start_token:
 158         goog_scope = self._GoogScopeOrNone(start_token.token)
 159         if goog_scope is not None:
 160           if not token.line.endswith(';  // goog.scope\n'):
 161             if (token.line.find('//') > -1 and
 162                 token.line.find('goog.scope') >
 163                 token.line.find('//')):
 164               indentation_errors.append([
 165                   errors.MALFORMED_END_OF_SCOPE_COMMENT,
 166                   ('Malformed end of goog.scope comment. Please use the '
 167                    'exact following syntax to close the scope:\n'
 168                    '});  // goog.scope'),
 169                   token,
 170                   Position(token.start_index, token.length)])
 171             else:
 172               indentation_errors.append([
 173                   errors.MISSING_END_OF_SCOPE_COMMENT,
 174                   ('Missing comment for end of goog.scope which opened at line '
 175                    '%d. End the scope with:\n'
 176                    '});  // goog.scope' %
 177                    (start_token.line_number)),
 178                   token,
 179                   Position(token.start_index, token.length)])
 180
 181     elif token_type == Type.KEYWORD and token.string in ('case', 'default'):
 182       self._Add(self._PopTo(Type.START_BLOCK))
 183
 184     elif is_first and token.string == '.':
 185       # This token should have been on the previous line, so treat it as if it
 186       # was there.
 187       info = TokenInfo(token)
 188       info.line_number = token.line_number - 1
 189       self._Add(info)
 190
 191     elif token_type == Type.SEMICOLON:
 192       self._PopTransient()
 193
 194     not_binary_operator = (token_type != Type.OPERATOR or
 195                            token.metadata.IsUnaryOperator())
 196     not_dot = token.string != '.'
 197     if is_first and not_binary_operator and not_dot and token.type not in (
 198         Type.COMMENT, Type.DOC_PREFIX, Type.STRING_TEXT):
 199       if flags.FLAGS.debug_indentation:
 200         print 'Line #%d: stack %r' % (token.line_number, stack)
 201
 202       # Ignore lines that start in JsDoc since we don't check them properly yet.
 203       # TODO(robbyw): Support checking JsDoc indentation.
 204       # Ignore lines that start as multi-line strings since indentation is N/A.
 205       # Ignore lines that start with operators since we report that already.
 206       # Ignore lines with tabs since we report that already.
 207       expected = self._GetAllowableIndentations()
 208       actual = self._GetActualIndentation(token)
 209
 210       # Special case comments describing else, case, and default.  Allow them
 211       # to outdent to the parent block.
 212       if token_type in Type.COMMENT_TYPES:
 213         next_code = tokenutil.SearchExcept(token, Type.NON_CODE_TYPES)
 214         if next_code and next_code.type == Type.END_BLOCK:
 215           next_code = tokenutil.SearchExcept(next_code, Type.NON_CODE_TYPES)
 216         if next_code and next_code.string in ('else', 'case', 'default'):
 217           # TODO(robbyw): This almost certainly introduces false negatives.
 218           expected |= self._AddToEach(expected, -2)
 219
 220       if actual >= 0 and actual not in expected:
 221         expected = sorted(expected)
 222         indentation_errors.append([
 223             errors.WRONG_INDENTATION,
 224             'Wrong indentation: expected any of {%s} but got %d' % (
 225                 ', '.join(
 226                     ['%d' % x for x in expected]), actual),
 227             token,
 228             Position(actual, expected[0])])
 229         self._start_index_offset[token.line_number] = expected[0] - actual
 230
 231     # Add tokens that could increase indentation.
 232     if token_type == Type.START_BRACKET:
 233       self._Add(TokenInfo(token=token,
 234           is_block=token.metadata.context.type == Context.ARRAY_LITERAL))
 235
 236     elif token_type == Type.START_BLOCK or token.metadata.is_implied_block:
 237       self._Add(TokenInfo(token=token, is_block=True))
 238
 239     elif token_type in (Type.START_PAREN, Type.START_PARAMETERS):
 240       self._Add(TokenInfo(token=token, is_block=False))
 241
 242     elif token_type == Type.KEYWORD and token.string == 'return':
 243       self._Add(TokenInfo(token))
 244
 245     elif not token.IsLastInLine() and (
 246         token.IsAssignment() or token.IsOperator('?')):
 247       self._Add(TokenInfo(token=token))
 248
 249     # Handle implied block closes.
 250     if token.metadata.is_implied_block_close:
 251       self._PopToImpliedBlock()
 252
 253     # Add some tokens only if they appear at the end of the line.
 254     is_last = self._IsLastCodeInLine(token)
 255     if is_last:
 256       if token_type == Type.OPERATOR:
 257         if token.string == ':':
 258           if (stack and stack[-1].token.string == '?'):
 259             # When a ternary : is on a different line than its '?', it doesn't
 260             # add indentation.
 261             if (token.line_number == stack[-1].token.line_number):
 262               self._Add(TokenInfo(token))
 263           elif token.metadata.context.type == Context.CASE_BLOCK:
 264             # Pop transient tokens from say, line continuations, e.g.,
 265             # case x.
 266             #     y:
 267             # Want to pop the transient 4 space continuation indent.
 268             self._PopTransient()
 269             # Starting the body of the case statement, which is a type of
 270             # block.
 271             self._Add(TokenInfo(token=token, is_block=True))
 272           elif token.metadata.context.type == Context.LITERAL_ELEMENT:
 273             # When in an object literal, acts as operator indicating line
 274             # continuations.
 275             self._Add(TokenInfo(token))
 276             pass
 277           else:
 278             # ':' might also be a statement label, no effect on indentation in
 279             # this case.
 280             pass
 281
 282         elif token.string != ',':
 283           self._Add(TokenInfo(token))
 284         else:
 285           # The token is a comma.
 286           if token.metadata.context.type == Context.VAR:
 287             self._Add(TokenInfo(token))
 288           elif token.metadata.context.type != Context.PARAMETERS:
 289             self._PopTransient()
 290
 291       elif (token.string.endswith('.')
 292             and token_type in (Type.IDENTIFIER, Type.NORMAL)):
 293         self._Add(TokenInfo(token))
 294       elif token_type == Type.PARAMETERS and token.string.endswith(','):
 295         # Parameter lists.
 296         self._Add(TokenInfo(token))
 297       elif token.metadata.is_implied_semicolon:
 298         self._PopTransient()
 299     elif token.IsAssignment():
 300       self._Add(TokenInfo(token))
 301
 302     return indentation_errors
 303
 304   def _AddToEach(self, original, amount):
 305     """Returns a new set with the given amount added to each element.
 306
 307     Args:
 308       original: The original set of numbers
 309       amount: The amount to add to each element
 310
 311     Returns:
 312       A new set containing each element of the original set added to the amount.
 313     """
 314     return set([x + amount for x in original])
 315
 316   _HARD_STOP_TYPES = (Type.START_PAREN, Type.START_PARAMETERS,
 317                       Type.START_BRACKET)
 318
 319   _HARD_STOP_STRINGS = ('return', '?')
 320
 321   def _IsHardStop(self, token):
 322     """Determines if the given token can have a hard stop after it.
 323
 324     Hard stops are indentations defined by the position of another token as in
 325     indentation lined up with return, (, [, and ?.
 326     """
 327     return (token.type in self._HARD_STOP_TYPES or
 328             token.string in self._HARD_STOP_STRINGS or
 329             token.IsAssignment())
 330
 331   def _GetAllowableIndentations(self):
 332     """Computes the set of allowable indentations.
 333
 334     Returns:
 335       The set of allowable indentations, given the current stack.
 336     """
 337     expected = set([0])
 338     hard_stops = set([])
 339
 340     # Whether the tokens are still in the same continuation, meaning additional
 341     # indentation is optional.  As an example:
 342     # x = 5 +
 343     #     6 +
 344     #     7;
 345     # The second '+' does not add any required indentation.
 346     in_same_continuation = False
 347
 348     for token_info in self._stack:
 349       token = token_info.token
 350
 351       # Handle normal additive indentation tokens.
 352       if not token_info.overridden_by and token.string != 'return':
 353         if token_info.is_block:
 354           expected = self._AddToEach(expected, 2)
 355           hard_stops = self._AddToEach(hard_stops, 2)
 356           in_same_continuation = False
 357         elif in_same_continuation:
 358           expected |= self._AddToEach(expected, 4)
 359           hard_stops |= self._AddToEach(hard_stops, 4)
 360         else:
 361           expected = self._AddToEach(expected, 4)
 362           hard_stops |= self._AddToEach(hard_stops, 4)
 363           in_same_continuation = True
 364
 365       # Handle hard stops after (, [, return, =, and ?
 366       if self._IsHardStop(token):
 367         override_is_hard_stop = (token_info.overridden_by and
 368             self._IsHardStop(token_info.overridden_by.token))
 369         if not override_is_hard_stop:
 370           start_index = token.start_index
 371           if token.line_number in self._start_index_offset:
 372             start_index += self._start_index_offset[token.line_number]
 373           if (token.type in (Type.START_PAREN, Type.START_PARAMETERS) and
 374               not token_info.overridden_by):
 375             hard_stops.add(start_index + 1)
 376
 377           elif token.string == 'return' and not token_info.overridden_by:
 378             hard_stops.add(start_index + 7)
 379
 380           elif (token.type == Type.START_BRACKET):
 381             hard_stops.add(start_index + 1)
 382
 383           elif token.IsAssignment():
 384             hard_stops.add(start_index + len(token.string) + 1)
 385
 386           elif token.IsOperator('?') and not token_info.overridden_by:
 387             hard_stops.add(start_index + 2)
 388
 389     return (expected | hard_stops) or set([0])
 390
 391   def _GetActualIndentation(self, token):
 392     """Gets the actual indentation of the line containing the given token.
 393
 394     Args:
 395       token: Any token on the line.
 396
 397     Returns:
 398       The actual indentation of the line containing the given token.  Returns
 399       -1 if this line should be ignored due to the presence of tabs.
 400     """
 401     # Move to the first token in the line
 402     token = tokenutil.GetFirstTokenInSameLine(token)
 403
 404     # If it is whitespace, it is the indentation.
 405     if token.type == Type.WHITESPACE:
 406       if token.string.find('\t') >= 0:
 407         return -1
 408       else:
 409         return len(token.string)
 410     elif token.type == Type.PARAMETERS:
 411       return len(token.string) - len(token.string.lstrip())
 412     else:
 413       return 0
 414
 415   def _IsFirstNonWhitespaceTokenInLine(self, token):
 416     """Determines if the given token is the first non-space token on its line.
 417
 418     Args:
 419       token: The token.
 420
 421     Returns:
 422       True if the token is the first non-whitespace token on its line.
 423     """
 424     if token.type in (Type.WHITESPACE, Type.BLANK_LINE):
 425       return False
 426     if token.IsFirstInLine():
 427       return True
 428     return (token.previous and token.previous.IsFirstInLine() and
 429             token.previous.type == Type.WHITESPACE)
 430
 431   def _IsLastCodeInLine(self, token):
 432     """Determines if the given token is the last code token on its line.
 433
 434     Args:
 435       token: The token.
 436
 437     Returns:
 438       True if the token is the last code token on its line.
 439     """
 440     if token.type in Type.NON_CODE_TYPES:
 441       return False
 442     start_token = token
 443     while True:
 444       token = token.next
 445       if not token or token.line_number != start_token.line_number:
 446         return True
 447       if token.type not in Type.NON_CODE_TYPES:
 448         return False
 449
 450   def _GoogScopeOrNone(self, token):
 451     """Determines if the given START_BLOCK is part of a goog.scope statement.
 452
 453     Args:
 454       token: A token of type START_BLOCK.
 455
 456     Returns:
 457       The goog.scope function call token, or None if such call doesn't exist.
 458     """
 459     # Search for a goog.scope statement, which will be 5 tokens before the
 460     # block. Illustration of the tokens found prior to the start block:
 461     # goog.scope(function() {
 462     #      5    4    3   21 ^
 463
 464     maybe_goog_scope = token
 465     for unused_i in xrange(5):
 466       maybe_goog_scope = (maybe_goog_scope.previous if maybe_goog_scope and
 467                           maybe_goog_scope.previous else None)
 468     if maybe_goog_scope and maybe_goog_scope.string == 'goog.scope':
 469       return maybe_goog_scope
 470
 471   def _Add(self, token_info):
 472     """Adds the given token info to the stack.
 473
 474     Args:
 475       token_info: The token information to add.
 476     """
 477     if self._stack and self._stack[-1].token == token_info.token:
 478       # Don't add the same token twice.
 479       return
 480
 481     if token_info.is_block or token_info.token.type == Type.START_PAREN:
 482       token_info.overridden_by = self._GoogScopeOrNone(token_info.token)
 483       index = 1
 484       while index <= len(self._stack):
 485         stack_info = self._stack[-index]
 486         stack_token = stack_info.token
 487
 488         if stack_info.line_number == token_info.line_number:
 489           # In general, tokens only override each other when they are on
 490           # the same line.
 491           stack_info.overridden_by = token_info
 492           if (token_info.token.type == Type.START_BLOCK and
 493               (stack_token.IsAssignment() or
 494                stack_token.type in (Type.IDENTIFIER, Type.START_PAREN))):
 495             # Multi-line blocks have lasting overrides, as in:
 496             # callFn({
 497             #   a: 10
 498             # },
 499             # 30);
 500             close_block = token_info.token.metadata.context.end_token
 501             stack_info.is_permanent_override = \
 502                 close_block.line_number != token_info.token.line_number
 503         elif (token_info.token.type == Type.START_BLOCK and
 504               token_info.token.metadata.context.type == Context.BLOCK and
 505               (stack_token.IsAssignment() or
 506                stack_token.type == Type.IDENTIFIER)):
 507           # When starting a function block, the override can transcend lines.
 508           # For example
 509           # long.long.name = function(
 510           #     a) {
 511           # In this case the { and the = are on different lines.  But the
 512           # override should still apply.
 513           stack_info.overridden_by = token_info
 514           stack_info.is_permanent_override = True
 515         else:
 516           break
 517         index += 1
 518
 519     self._stack.append(token_info)
 520
 521   def _Pop(self):
 522     """Pops the top token from the stack.
 523
 524     Returns:
 525       The popped token info.
 526     """
 527     token_info = self._stack.pop()
 528     if token_info.token.type not in (Type.START_BLOCK, Type.START_BRACKET):
 529       # Remove any temporary overrides.
 530       self._RemoveOverrides(token_info)
 531     else:
 532       # For braces and brackets, which can be object and array literals, remove
 533       # overrides when the literal is closed on the same line.
 534       token_check = token_info.token
 535       same_type = token_check.type
 536       goal_type = None
 537       if token_info.token.type == Type.START_BRACKET:
 538         goal_type = Type.END_BRACKET
 539       else:
 540         goal_type = Type.END_BLOCK
 541       line_number = token_info.token.line_number
 542       count = 0
 543       while token_check and token_check.line_number == line_number:
 544         if token_check.type == goal_type:
 545           count -= 1
 546           if not count:
 547             self._RemoveOverrides(token_info)
 548             break
 549         if token_check.type == same_type:
 550           count += 1
 551         token_check = token_check.next
 552     return token_info
 553
 554   def _PopToImpliedBlock(self):
 555     """Pops the stack until an implied block token is found."""
 556     while not self._Pop().token.metadata.is_implied_block:
 557       pass
 558
 559   def _PopTo(self, stop_type):
 560     """Pops the stack until a token of the given type is popped.
 561
 562     Args:
 563       stop_type: The type of token to pop to.
 564
 565     Returns:
 566       The token info of the given type that was popped.
 567     """
 568     last = None
 569     while True:
 570       last = self._Pop()
 571       if last.token.type == stop_type:
 572         break
 573     return last
 574
 575   def _RemoveOverrides(self, token_info):
 576     """Marks any token that was overridden by this token as active again.
 577
 578     Args:
 579       token_info: The token that is being removed from the stack.
 580     """
 581     for stack_token in self._stack:
 582       if (stack_token.overridden_by == token_info and
 583           not stack_token.is_permanent_override):
 584         stack_token.overridden_by = None
 585
 586   def _PopTransient(self):
 587     """Pops all transient tokens - i.e. not blocks, literals, or parens."""
 588     while self._stack and self._stack[-1].is_transient:
 589       self._Pop()