2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Checks third-party licenses for the purposes of the Android WebView build.
8 The Android tree includes a snapshot of Chromium in order to power the system
9 WebView. This tool checks that all code uses open-source licenses compatible
10 with Android, and that we meet the requirements of those licenses. It can also
11 be used to generate an Android NOTICE file for the third-party code.
13 It makes use of src/tools/licenses.py and the README.chromium files on which
14 it depends. It also makes use of a data file, third_party_files_whitelist.txt,
15 which whitelists indicidual files which contain third-party code but which
16 aren't in a third-party directory with a README.chromium file.
21 import multiprocessing
29 REPOSITORY_ROOT = os.path.abspath(os.path.join(
30 os.path.dirname(__file__), '..', '..'))
32 # Import third_party/PRESUBMIT.py via imp to avoid importing a random
33 # PRESUBMIT.py from $PATH, also make sure we don't generate a .pyc file.
34 sys.dont_write_bytecode = True
36 imp.load_source('PRESUBMIT', \
37 os.path.join(REPOSITORY_ROOT, 'third_party', 'PRESUBMIT.py'))
39 sys.path.append(os.path.join(REPOSITORY_ROOT, 'tools'))
42 import copyright_scanner
45 class InputApi(object):
47 self.os_path = os.path
48 self.os_walk = os.walk
50 self.ReadFile = _ReadFile
51 self.change = InputApiChange()
53 class InputApiChange(object):
55 self.RepositoryRoot = lambda: REPOSITORY_ROOT
58 def GetIncompatibleDirectories():
59 """Gets a list of third-party directories which use licenses incompatible
60 with Android. This is used by the snapshot tool.
62 A list of directories.
66 for directory in _FindThirdPartyDirs():
67 if directory in known_issues.KNOWN_ISSUES:
68 result.append(directory)
71 metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
72 require_license_file=False,
73 optional_keys=['License Android Compatible'])
74 except licenses.LicenseError as e:
75 print 'Got LicenseError while scanning ' + directory
77 if metadata.get('License Android Compatible', 'no').upper() == 'YES':
79 license = re.split(' [Ll]icenses?$', metadata['License'])[0]
80 if not third_party.LicenseIsCompatibleWithAndroid(InputApi(), license):
81 result.append(directory)
84 def GetUnknownIncompatibleDirectories():
85 """Gets a list of third-party directories which use licenses incompatible
86 with Android which are not present in the known_issues.py file.
87 This is used by the AOSP bot.
89 A list of directories.
91 incompatible_directories = frozenset(GetIncompatibleDirectories())
92 known_incompatible = []
93 for path, exclude_list in known_issues.KNOWN_INCOMPATIBLE.iteritems():
94 for exclude in exclude_list:
95 if glob.has_magic(exclude):
96 exclude_dirname = os.path.dirname(exclude)
97 if glob.has_magic(exclude_dirname):
98 print ('Exclude path %s contains an unexpected glob expression,' \
99 ' skipping.' % exclude)
100 exclude = exclude_dirname
101 known_incompatible.append(os.path.normpath(os.path.join(path, exclude)))
102 known_incompatible = frozenset(known_incompatible)
103 return incompatible_directories.difference(known_incompatible)
106 class ScanResult(object):
107 Ok, Warnings, Errors = range(3)
109 # Needs to be a top-level function for multiprocessing
110 def _FindCopyrightViolations(files_to_scan_as_string):
111 return copyright_scanner.FindCopyrightViolations(
112 InputApi(), REPOSITORY_ROOT, files_to_scan_as_string)
114 def _ShardList(l, shard_len):
115 return [l[i:i + shard_len] for i in range(0, len(l), shard_len)]
117 def _CheckLicenseHeaders(excluded_dirs_list, whitelisted_files):
118 """Checks that all files which are not in a listed third-party directory,
119 and which do not use the standard Chromium license, are whitelisted.
121 excluded_dirs_list: The list of directories to exclude from scanning.
122 whitelisted_files: The whitelist of files.
124 ScanResult.Ok if all files with non-standard license headers are whitelisted
125 and the whitelist contains no stale entries;
126 ScanResult.Warnings if there are stale entries;
127 ScanResult.Errors if new non-whitelisted entries found.
129 input_api = InputApi()
130 files_to_scan = copyright_scanner.FindFiles(
131 input_api, REPOSITORY_ROOT, ['.'], excluded_dirs_list)
132 sharded_files_to_scan = _ShardList(files_to_scan, 2000)
133 pool = multiprocessing.Pool()
134 offending_files_chunks = pool.map_async(
135 _FindCopyrightViolations, sharded_files_to_scan).get(999999)
138 # Flatten out the result
140 [item for sublist in offending_files_chunks for item in sublist]
142 (unknown, missing, stale) = copyright_scanner.AnalyzeScanResults(
143 input_api, whitelisted_files, offending_files)
146 print 'The following files contain a third-party license but are not in ' \
147 'a listed third-party directory and are not whitelisted. You must ' \
148 'add the following files to the whitelist.\n%s' % \
149 '\n'.join(sorted(unknown))
151 print 'The following files are whitelisted, but do not exist.\n%s' % \
152 '\n'.join(sorted(missing))
154 print 'The following files are whitelisted unnecessarily. You must ' \
155 'remove the following files from the whitelist.\n%s' % \
156 '\n'.join(sorted(stale))
159 return ScanResult.Errors
160 elif stale or missing:
161 return ScanResult.Warnings
166 def _ReadFile(full_path, mode='rU'):
167 """Reads a file from disk. This emulates presubmit InputApi.ReadFile func.
169 full_path: The path of the file to read.
171 The contents of the file as a string.
174 with open(full_path, mode) as f:
178 def _ReadLocalFile(path, mode='rb'):
179 """Reads a file from disk.
181 path: The path of the file to read, relative to the root of the repository.
183 The contents of the file as a string.
186 return _ReadFile(os.path.join(REPOSITORY_ROOT, path), mode)
189 def _FindThirdPartyDirs():
190 """Gets the list of third-party directories.
192 The list of third-party directories.
195 # Please don't add here paths that have problems with license files,
196 # as they will end up included in Android WebView snapshot.
197 # Instead, add them into known_issues.py.
199 # Temporary until we figure out how not to check out quickoffice on the
200 # Android license check bot. Tracked in crbug.com/350472.
201 os.path.join('chrome', 'browser', 'resources', 'chromeos', 'quickoffice'),
202 # Placeholder directory, no third-party code.
203 os.path.join('third_party', 'adobe'),
204 # Apache 2.0 license. See
205 # https://code.google.com/p/chromium/issues/detail?id=140478.
206 os.path.join('third_party', 'bidichecker'),
207 # Isn't checked out on clients
208 os.path.join('third_party', 'gles2_conform'),
209 # The llvm-build doesn't exist for non-clang builder
210 os.path.join('third_party', 'llvm-build'),
211 # Binaries doesn't apply to android
212 os.path.join('third_party', 'widevine'),
213 # third_party directories in this tree aren't actually third party, but
214 # provide a way to shadow experimental buildfiles into those directories.
215 os.path.join('build', 'secondary'),
216 # Not shipped, Chromium code
217 os.path.join('tools', 'swarming_client'),
219 third_party_dirs = licenses.FindThirdPartyDirs(prune_paths, REPOSITORY_ROOT)
220 return licenses.FilterDirsWithFiles(third_party_dirs, REPOSITORY_ROOT)
224 """Checks that license meta-data is present for all third-party code and
225 that all non third-party code doesn't contain external copyrighted code.
227 ScanResult.Ok if everything is in order;
228 ScanResult.Warnings if there are non-fatal problems (e.g. stale whitelist
230 ScanResult.Errors otherwise.
233 third_party_dirs = _FindThirdPartyDirs()
235 # First, check designated third-party directories using src/tools/licenses.py.
236 all_licenses_valid = True
237 for path in sorted(third_party_dirs):
239 licenses.ParseDir(path, REPOSITORY_ROOT)
240 except licenses.LicenseError, e:
241 if not (path in known_issues.KNOWN_ISSUES):
242 print 'Got LicenseError "%s" while scanning %s' % (e, path)
243 all_licenses_valid = False
245 # Second, check for non-standard license text.
246 whitelisted_files = copyright_scanner.LoadWhitelistedFilesList(InputApi())
247 licenses_check = _CheckLicenseHeaders(third_party_dirs, whitelisted_files)
249 return licenses_check if all_licenses_valid else ScanResult.Errors
252 def GenerateNoticeFile():
253 """Generates the contents of an Android NOTICE file for the third-party code.
254 This is used by the snapshot tool.
256 The contents of the NOTICE file.
259 third_party_dirs = _FindThirdPartyDirs()
261 # Don't forget Chromium's LICENSE file
262 content = [_ReadLocalFile('LICENSE')]
264 # We provide attribution for all third-party directories.
265 # TODO(steveblock): Limit this to only code used by the WebView binary.
266 for directory in sorted(third_party_dirs):
267 metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
268 require_license_file=False)
269 license_file = metadata['License File']
270 if license_file and license_file != licenses.NOT_SHIPPED:
271 content.append(_ReadLocalFile(license_file))
273 return '\n'.join(content)
276 def _ProcessIncompatibleResult(incompatible_directories):
277 if incompatible_directories:
278 print ("Incompatibly licensed directories found:\n" +
279 "\n".join(sorted(incompatible_directories)))
280 return ScanResult.Errors
284 class FormatterWithNewLines(optparse.IndentedHelpFormatter):
285 def format_description(self, description):
286 paras = description.split('\n')
287 formatted_paras = [textwrap.fill(para, self.width) for para in paras]
288 return '\n'.join(formatted_paras) + '\n'
290 parser = optparse.OptionParser(formatter=FormatterWithNewLines(),
291 usage='%prog [options]')
292 parser.description = (__doc__ +
294 ' scan Check licenses.\n'
295 ' notice Generate Android NOTICE file on stdout.\n'
296 ' incompatible_directories Scan for incompatibly'
297 ' licensed directories.\n'
298 ' all_incompatible_directories Scan for incompatibly'
299 ' licensed directories (even those in'
300 ' known_issues.py).\n'
301 ' display_copyrights Display autorship on the files'
302 ' using names provided via stdin.\n')
303 (_, args) = parser.parse_args()
306 return ScanResult.Errors
308 if args[0] == 'scan':
309 scan_result = _Scan()
310 if scan_result == ScanResult.Ok:
313 elif args[0] == 'notice':
314 print GenerateNoticeFile()
316 elif args[0] == 'incompatible_directories':
317 return _ProcessIncompatibleResult(GetUnknownIncompatibleDirectories())
318 elif args[0] == 'all_incompatible_directories':
319 return _ProcessIncompatibleResult(GetIncompatibleDirectories())
320 elif args[0] == 'display_copyrights':
321 files = sys.stdin.read().splitlines()
323 zip(files, copyright_scanner.FindCopyrights(InputApi(), '.', files)):
324 print f, '\t', ' / '.join(sorted(c))
327 return ScanResult.Errors
329 if __name__ == '__main__':