src/third_party/closure_linter/closure_linter/closurizednamespacesinfo.py

   1 #!/usr/bin/env python
   2 #
   3 # Copyright 2008 The Closure Linter Authors. All Rights Reserved.
   4 #
   5 # Licensed under the Apache License, Version 2.0 (the "License");
   6 # you may not use this file except in compliance with the License.
   7 # You may obtain a copy of the License at
   8 #
   9 #      http://www.apache.org/licenses/LICENSE-2.0
  10 #
  11 # Unless required by applicable law or agreed to in writing, software
  12 # distributed under the License is distributed on an "AS-IS" BASIS,
  13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 # See the License for the specific language governing permissions and
  15 # limitations under the License.
  16
  17 """Logic for computing dependency information for closurized JavaScript files.
  18
  19 Closurized JavaScript files express dependencies using goog.require and
  20 goog.provide statements. In order for the linter to detect when a statement is
  21 missing or unnecessary, all identifiers in the JavaScript file must first be
  22 processed to determine if they constitute the creation or usage of a dependency.
  23 """
  24
  25
  26
  27 from closure_linter import javascripttokens
  28 from closure_linter import tokenutil
  29
  30 # pylint: disable-msg=C6409
  31 TokenType = javascripttokens.JavaScriptTokenType
  32
  33 DEFAULT_EXTRA_NAMESPACES = [
  34   'goog.testing.asserts',
  35   'goog.testing.jsunit',
  36 ]
  37
  38 class ClosurizedNamespacesInfo(object):
  39   """Dependency information for closurized JavaScript files.
  40
  41   Processes token streams for dependency creation or usage and provides logic
  42   for determining if a given require or provide statement is unnecessary or if
  43   there are missing require or provide statements.
  44   """
  45
  46   def __init__(self, closurized_namespaces, ignored_extra_namespaces):
  47     """Initializes an instance the ClosurizedNamespacesInfo class.
  48
  49     Args:
  50       closurized_namespaces: A list of namespace prefixes that should be
  51           processed for dependency information. Non-matching namespaces are
  52           ignored.
  53       ignored_extra_namespaces: A list of namespaces that should not be reported
  54           as extra regardless of whether they are actually used.
  55     """
  56     self._closurized_namespaces = closurized_namespaces
  57     self._ignored_extra_namespaces = (ignored_extra_namespaces +
  58                                       DEFAULT_EXTRA_NAMESPACES)
  59     self.Reset()
  60
  61   def Reset(self):
  62     """Resets the internal state to prepare for processing a new file."""
  63
  64     # A list of goog.provide tokens in the order they appeared in the file.
  65     self._provide_tokens = []
  66
  67     # A list of goog.require tokens in the order they appeared in the file.
  68     self._require_tokens = []
  69
  70     # Namespaces that are already goog.provided.
  71     self._provided_namespaces = []
  72
  73     # Namespaces that are already goog.required.
  74     self._required_namespaces = []
  75
  76     # Note that created_namespaces and used_namespaces contain both namespaces
  77     # and identifiers because there are many existing cases where a method or
  78     # constant is provided directly instead of its namespace. Ideally, these
  79     # two lists would only have to contain namespaces.
  80
  81     # A list of tuples where the first element is the namespace of an identifier
  82     # created in the file and the second is the identifier itself.
  83     self._created_namespaces = []
  84
  85     # A list of tuples where the first element is the namespace of an identifier
  86     # used in the file and the second is the identifier itself.
  87     self._used_namespaces = []
  88
  89     # A list of seemingly-unnecessary namespaces that are goog.required() and
  90     # annotated with @suppress {extraRequire}.
  91     self._suppressed_requires = []
  92
  93     # A list of goog.provide tokens which are duplicates.
  94     self._duplicate_provide_tokens = []
  95
  96     # A list of goog.require tokens which are duplicates.
  97     self._duplicate_require_tokens = []
  98
  99     # Whether this file is in a goog.scope. Someday, we may add support
 100     # for checking scopified namespaces, but for now let's just fail
 101     # in a more reasonable way.
 102     self._scopified_file = False
 103
 104     # TODO(user): Handle the case where there are 2 different requires
 105     # that can satisfy the same dependency, but only one is necessary.
 106
 107   def GetProvidedNamespaces(self):
 108     """Returns the namespaces which are already provided by this file.
 109
 110     Returns:
 111       A list of strings where each string is a 'namespace' corresponding to an
 112       existing goog.provide statement in the file being checked.
 113     """
 114     return list(self._provided_namespaces)
 115
 116   def GetRequiredNamespaces(self):
 117     """Returns the namespaces which are already required by this file.
 118
 119     Returns:
 120       A list of strings where each string is a 'namespace' corresponding to an
 121       existing goog.require statement in the file being checked.
 122     """
 123     return list(self._required_namespaces)
 124
 125   def IsExtraProvide(self, token):
 126     """Returns whether the given goog.provide token is unnecessary.
 127
 128     Args:
 129       token: A goog.provide token.
 130
 131     Returns:
 132       True if the given token corresponds to an unnecessary goog.provide
 133       statement, otherwise False.
 134     """
 135     if self._scopified_file:
 136       return False
 137
 138     namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string
 139
 140     base_namespace = namespace.split('.', 1)[0]
 141     if base_namespace not in self._closurized_namespaces:
 142       return False
 143
 144     if token in self._duplicate_provide_tokens:
 145       return True
 146
 147     # TODO(user): There's probably a faster way to compute this.
 148     for created_namespace, created_identifier in self._created_namespaces:
 149       if namespace == created_namespace or namespace == created_identifier:
 150         return False
 151
 152     return True
 153
 154   def IsExtraRequire(self, token):
 155     """Returns whether the given goog.require token is unnecessary.
 156
 157     Args:
 158       token: A goog.require token.
 159
 160     Returns:
 161       True if the given token corresponds to an unnecessary goog.require
 162       statement, otherwise False.
 163     """
 164     if self._scopified_file:
 165       return False
 166
 167     namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string
 168
 169     base_namespace = namespace.split('.', 1)[0]
 170     if base_namespace not in self._closurized_namespaces:
 171       return False
 172
 173     if namespace in self._ignored_extra_namespaces:
 174       return False
 175
 176     if token in self._duplicate_require_tokens:
 177       return True
 178
 179     if namespace in self._suppressed_requires:
 180       return False
 181
 182     # If the namespace contains a component that is initial caps, then that
 183     # must be the last component of the namespace.
 184     parts = namespace.split('.')
 185     if len(parts) > 1 and parts[-2][0].isupper():
 186       return True
 187
 188     # TODO(user): There's probably a faster way to compute this.
 189     for used_namespace, used_identifier in self._used_namespaces:
 190       if namespace == used_namespace or namespace == used_identifier:
 191         return False
 192
 193     return True
 194
 195   def GetMissingProvides(self):
 196     """Returns the set of missing provided namespaces for the current file.
 197
 198     Returns:
 199       Returns a set of strings where each string is a namespace that should be
 200       provided by this file, but is not.
 201     """
 202     if self._scopified_file:
 203       return set()
 204
 205     missing_provides = set()
 206     for namespace, identifier in self._created_namespaces:
 207       if (not self._IsPrivateIdentifier(identifier) and
 208           namespace not in self._provided_namespaces and
 209           identifier not in self._provided_namespaces and
 210           namespace not in self._required_namespaces):
 211         missing_provides.add(namespace)
 212
 213     return missing_provides
 214
 215   def GetMissingRequires(self):
 216     """Returns the set of missing required namespaces for the current file.
 217
 218     For each non-private identifier used in the file, find either a
 219     goog.require, goog.provide or a created identifier that satisfies it.
 220     goog.require statements can satisfy the identifier by requiring either the
 221     namespace of the identifier or the identifier itself. goog.provide
 222     statements can satisfy the identifier by providing the namespace of the
 223     identifier. A created identifier can only satisfy the used identifier if
 224     it matches it exactly (necessary since things can be defined on a
 225     namespace in more than one file). Note that provided namespaces should be
 226     a subset of created namespaces, but we check both because in some cases we
 227     can't always detect the creation of the namespace.
 228
 229     Returns:
 230       Returns a set of strings where each string is a namespace that should be
 231       required by this file, but is not.
 232     """
 233     if self._scopified_file:
 234       return set()
 235
 236     external_dependencies = set(self._required_namespaces)
 237
 238     # Assume goog namespace is always available.
 239     external_dependencies.add('goog')
 240
 241     created_identifiers = set()
 242     for namespace, identifier in self._created_namespaces:
 243       created_identifiers.add(identifier)
 244
 245     missing_requires = set()
 246     for namespace, identifier in self._used_namespaces:
 247       if (not self._IsPrivateIdentifier(identifier) and
 248           namespace not in external_dependencies and
 249           namespace not in self._provided_namespaces and
 250           identifier not in external_dependencies and
 251           identifier not in created_identifiers):
 252         missing_requires.add(namespace)
 253
 254     return missing_requires
 255
 256   def _IsPrivateIdentifier(self, identifier):
 257     """Returns whether the given identifer is private."""
 258     pieces = identifier.split('.')
 259     for piece in pieces:
 260       if piece.endswith('_'):
 261         return True
 262     return False
 263
 264   def IsFirstProvide(self, token):
 265     """Returns whether token is the first provide token."""
 266     return self._provide_tokens and token == self._provide_tokens[0]
 267
 268   def IsFirstRequire(self, token):
 269     """Returns whether token is the first require token."""
 270     return self._require_tokens and token == self._require_tokens[0]
 271
 272   def IsLastProvide(self, token):
 273     """Returns whether token is the last provide token."""
 274     return self._provide_tokens and token == self._provide_tokens[-1]
 275
 276   def IsLastRequire(self, token):
 277     """Returns whether token is the last require token."""
 278     return self._require_tokens and token == self._require_tokens[-1]
 279
 280   def ProcessToken(self, token, state_tracker):
 281     """Processes the given token for dependency information.
 282
 283     Args:
 284       token: The token to process.
 285       state_tracker: The JavaScript state tracker.
 286     """
 287
 288     # Note that this method is in the critical path for the linter and has been
 289     # optimized for performance in the following ways:
 290     # - Tokens are checked by type first to minimize the number of function
 291     #   calls necessary to determine if action needs to be taken for the token.
 292     # - The most common tokens types are checked for first.
 293     # - The number of function calls has been minimized (thus the length of this
 294     #   function.
 295
 296     if token.type == TokenType.IDENTIFIER:
 297       # TODO(user): Consider saving the whole identifier in metadata.
 298       whole_identifier_string = self._GetWholeIdentifierString(token)
 299       if whole_identifier_string is None:
 300         # We only want to process the identifier one time. If the whole string
 301         # identifier is None, that means this token was part of a multi-token
 302         # identifier, but it was not the first token of the identifier.
 303         return
 304
 305       # In the odd case that a goog.require is encountered inside a function,
 306       # just ignore it (e.g. dynamic loading in test runners).
 307       if token.string == 'goog.require' and not state_tracker.InFunction():
 308         self._require_tokens.append(token)
 309         namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string
 310         if namespace in self._required_namespaces:
 311           self._duplicate_require_tokens.append(token)
 312         else:
 313           self._required_namespaces.append(namespace)
 314
 315         # If there is a suppression for the require, add a usage for it so it
 316         # gets treated as a regular goog.require (i.e. still gets sorted).
 317         jsdoc = state_tracker.GetDocComment()
 318         if jsdoc and ('extraRequire' in jsdoc.suppressions):
 319           self._suppressed_requires.append(namespace)
 320           self._AddUsedNamespace(state_tracker, namespace)
 321
 322       elif token.string == 'goog.provide':
 323         self._provide_tokens.append(token)
 324         namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string
 325         if namespace in self._provided_namespaces:
 326           self._duplicate_provide_tokens.append(token)
 327         else:
 328           self._provided_namespaces.append(namespace)
 329
 330         # If there is a suppression for the provide, add a creation for it so it
 331         # gets treated as a regular goog.provide (i.e. still gets sorted).
 332         jsdoc = state_tracker.GetDocComment()
 333         if jsdoc and ('extraProvide' in jsdoc.suppressions):
 334           self._AddCreatedNamespace(state_tracker, namespace)
 335
 336       elif token.string == 'goog.scope':
 337         self._scopified_file = True
 338
 339       else:
 340         jsdoc = state_tracker.GetDocComment()
 341         if jsdoc and jsdoc.HasFlag('typedef'):
 342           self._AddCreatedNamespace(state_tracker, whole_identifier_string,
 343                                     self.GetClosurizedNamespace(
 344                                         whole_identifier_string))
 345         else:
 346           self._AddUsedNamespace(state_tracker, whole_identifier_string)
 347
 348     elif token.type == TokenType.SIMPLE_LVALUE:
 349       identifier = token.values['identifier']
 350       namespace = self.GetClosurizedNamespace(identifier)
 351       if state_tracker.InFunction():
 352         self._AddUsedNamespace(state_tracker, identifier)
 353       elif namespace and namespace != 'goog':
 354         self._AddCreatedNamespace(state_tracker, identifier, namespace)
 355
 356     elif token.type == TokenType.DOC_FLAG:
 357       flag_type = token.attached_object.flag_type
 358       is_interface = state_tracker.GetDocComment().HasFlag('interface')
 359       if flag_type == 'implements' or (flag_type == 'extends' and is_interface):
 360         # Interfaces should be goog.require'd.
 361         doc_start = tokenutil.Search(token, TokenType.DOC_START_BRACE)
 362         interface = tokenutil.Search(doc_start, TokenType.COMMENT)
 363         self._AddUsedNamespace(state_tracker, interface.string)
 364
 365
 366   def _GetWholeIdentifierString(self, token):
 367     """Returns the whole identifier string for the given token.
 368
 369     Checks the tokens after the current one to see if the token is one in a
 370     sequence of tokens which are actually just one identifier (i.e. a line was
 371     wrapped in the middle of an identifier).
 372
 373     Args:
 374       token: The token to check.
 375
 376     Returns:
 377       The whole identifier string or None if this token is not the first token
 378       in a multi-token identifier.
 379     """
 380     result = ''
 381
 382     # Search backward to determine if this token is the first token of the
 383     # identifier. If it is not the first token, return None to signal that this
 384     # token should be ignored.
 385     prev_token = token.previous
 386     while prev_token:
 387       if (prev_token.IsType(TokenType.IDENTIFIER) or
 388           prev_token.IsType(TokenType.NORMAL) and prev_token.string == '.'):
 389         return None
 390       elif (not prev_token.IsType(TokenType.WHITESPACE) and
 391             not prev_token.IsAnyType(TokenType.COMMENT_TYPES)):
 392         break
 393       prev_token = prev_token.previous
 394
 395     # Search forward to find other parts of this identifier separated by white
 396     # space.
 397     next_token = token
 398     while next_token:
 399       if (next_token.IsType(TokenType.IDENTIFIER) or
 400           next_token.IsType(TokenType.NORMAL) and next_token.string == '.'):
 401         result += next_token.string
 402       elif (not next_token.IsType(TokenType.WHITESPACE) and
 403             not next_token.IsAnyType(TokenType.COMMENT_TYPES)):
 404         break
 405       next_token = next_token.next
 406
 407     return result
 408
 409   def _AddCreatedNamespace(self, state_tracker, identifier, namespace=None):
 410     """Adds the namespace of an identifier to the list of created namespaces.
 411
 412     If the identifier is annotated with a 'missingProvide' suppression, it is
 413     not added.
 414
 415     Args:
 416       state_tracker: The JavaScriptStateTracker instance.
 417       identifier: The identifier to add.
 418       namespace: The namespace of the identifier or None if the identifier is
 419           also the namespace.
 420     """
 421     if not namespace:
 422       namespace = identifier
 423
 424     jsdoc = state_tracker.GetDocComment()
 425     if jsdoc and 'missingProvide' in jsdoc.suppressions:
 426       return
 427
 428     self._created_namespaces.append([namespace, identifier])
 429
 430   def _AddUsedNamespace(self, state_tracker, identifier):
 431     """Adds the namespace of an identifier to the list of used namespaces.
 432
 433     If the identifier is annotated with a 'missingRequire' suppression, it is
 434     not added.
 435
 436     Args:
 437       state_tracker: The JavaScriptStateTracker instance.
 438       identifier: An identifier which has been used.
 439     """
 440     jsdoc = state_tracker.GetDocComment()
 441     if jsdoc and 'missingRequire' in jsdoc.suppressions:
 442       return
 443
 444     namespace = self.GetClosurizedNamespace(identifier)
 445     if namespace:
 446       self._used_namespaces.append([namespace, identifier])
 447
 448   def GetClosurizedNamespace(self, identifier):
 449     """Given an identifier, returns the namespace that identifier is from.
 450
 451     Args:
 452       identifier: The identifier to extract a namespace from.
 453
 454     Returns:
 455       The namespace the given identifier resides in, or None if one could not
 456       be found.
 457     """
 458     if identifier.startswith('goog.global'):
 459       # Ignore goog.global, since it is, by definition, global.
 460       return None
 461
 462     parts = identifier.split('.')
 463     for namespace in self._closurized_namespaces:
 464       if not identifier.startswith(namespace + '.'):
 465         continue
 466
 467       last_part = parts[-1]
 468       if not last_part:
 469         # TODO(robbyw): Handle this: it's a multi-line identifier.
 470         return None
 471
 472       # The namespace for a class is the shortest prefix ending in a class
 473       # name, which starts with a capital letter but is not a capitalized word.
 474       #
 475       # We ultimately do not want to allow requiring or providing of inner
 476       # classes/enums.  Instead, a file should provide only the top-level class
 477       # and users should require only that.
 478       namespace = []
 479       for part in parts:
 480         if part == 'prototype' or part.isupper():
 481           return '.'.join(namespace)
 482         namespace.append(part)
 483         if part[0].isupper():
 484           return '.'.join(namespace)
 485
 486       # At this point, we know there's no class or enum, so the namespace is
 487       # just the identifier with the last part removed. With the exception of
 488       # apply, inherits, and call, which should also be stripped.
 489       if parts[-1] in ('apply', 'inherits', 'call'):
 490         parts.pop()
 491       parts.pop()
 492
 493       # If the last part ends with an underscore, it is a private variable,
 494       # method, or enum. The namespace is whatever is before it.
 495       if parts and parts[-1].endswith('_'):
 496         parts.pop()
 497
 498       return '.'.join(parts)
 499
 500     return None