1 # Copyright 2012 The Chromium OS Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Library for validating ebuild license information, and generating credits.
7 Documentation on this script is also available here:
8 http://www.chromium.org/chromium-os/licensing
11 from __future__ import print_function
20 from chromite.cbuildbot import constants
21 from chromite.lib import cros_build_lib
22 from chromite.lib import osutils
23 from chromite.lib import portage_util
25 # We are imported by src/repohooks/pre-upload.py in a non chroot environment
26 # where yaml may not be there, so we don't error on that since it's not needed
35 # See http://crbug.com/207004 for discussion.
36 PER_PKG_LICENSE_DIR = 'var/db/pkg'
38 STOCK_LICENSE_DIRS = [
39 os.path.join(constants.SOURCE_ROOT,
40 'src/third_party/portage-stable/licenses'),
43 # There are licenses for custom software we got and isn't part of
45 CUSTOM_LICENSE_DIRS = [
46 os.path.join(constants.SOURCE_ROOT,
47 'src/third_party/chromiumos-overlay/licenses'),
50 COPYRIGHT_ATTRIBUTION_DIR = (
52 constants.SOURCE_ROOT,
53 'src/third_party/chromiumos-overlay/licenses/copyright-attribution'))
55 # Virtual packages don't need to have a license and often don't, so we skip them
56 # chromeos-base contains google platform packages that are covered by the
57 # general license at top of tree, so we skip those too.
58 SKIPPED_CATEGORIES = [
63 # Fix these packages by adding a real license in the code.
64 # You should not skip packages just because the license scraping doesn't
65 # work. Stick those special cases into PACKAGE_LICENSES.
66 # Packages should only be here because they are sub/split packages already
67 # covered by the license of the main package.
69 # These are Chrome-OS-specific packages, copyright BSD-Google
70 'sys-kernel/chromeos-kernel', # already manually credit Linux
74 # Some of our packages contain binary blobs for which we have special
75 # negotiated licenses, and no need to display anything publicly. Strongly
76 # consider using Google-TOS instead, if possible.
79 # If you have an early repo for which license terms have yet to be decided
80 # use this. It will cause licensing for the package to be mostly ignored.
81 # Official should error for any package with this license.
82 'TAINTED', # TODO(dgarrett): Error on official builds with this license.
85 LICENSE_NAMES_REGEX = [
88 r'^copyright[.]regex$', # llvm
91 r'^licensing.*$', # libatomic_ops
92 r'^ipa_font_license_agreement_v1[.]0[.]txt$', # ja-ipafonts
93 r'^PKG-INFO$', # copyright assignment for
94 # some python packages
95 # (netifaces, unittest2)
98 # These are _temporary_ license mappings for packages that do not have a valid
99 # shared/custom license, or LICENSE file we can use.
100 # Once this script runs earlier (during the package build process), it will
101 # block new source without a LICENSE file if the ebuild contains a license
102 # that requires copyright assignment (BSD and friends).
103 # At that point, new packages will get fixed to include LICENSE instead of
104 # adding workaround mappings like those below.
105 # The way you now fix copyright attribution cases create a custom file with the
106 # right license directly in COPYRIGHT_ATTRIBUTION_DIR.
108 # TODO: replace the naive license parsing code in this script with a hook
109 # into portage's license parsing. See http://crbug.com/348779
111 # Chrome (the browser) is complicated, it has a morphing license that is
112 # either BSD-Google, or BSD-Google,Google-TOS depending on how it was
113 # built. We bypass this problem for now by hardcoding the Google-TOS bit as
114 # per ChromeOS with non free bits
115 'chromeos-base/chromeos-chrome': ['BSD-Google', 'Google-TOS'],
117 # Currently the code cannot parse LGPL-3 || ( LGPL-2.1 MPL-1.1 )
118 'dev-python/pycairo': ['LGPL-3', 'LGPL-2.1'],
121 # Any license listed list here found in the ebuild will make the code look for
122 # license files inside the package source code in order to get copyright
123 # attribution from them.
124 COPYRIGHT_ATTRIBUTION_LICENSES = [
125 'BSD', # requires distribution of copyright notice
126 'BSD-2', # so does BSD-2 http://opensource.org/licenses/BSD-2-Clause
127 'BSD-3', # and BSD-3? http://opensource.org/licenses/BSD-3-Clause
129 'BSD-with-attribution',
131 'MIT-with-advertising',
135 # The following licenses are not invalid or to show as a less helpful stock
136 # license, but it's better to look in the source code for a more specific
137 # license if there is one, but not an error if no better one is found.
138 # Note that you don't want to set just anything here since any license here
139 # will be included once in stock form and a second time in custom form if
140 # found (there is no good way to know that a license we found on disk is the
141 # better version of the stock version, so we show both).
142 LOOK_IN_SOURCE_LICENSES = [
143 'as-is', # The stock license is very vague, source always has more details.
144 'PSF-2', # The custom license in python is more complete than the template.
146 # As far as I know, we have no requirement to do copyright attribution for
147 # these licenses, but the license included in the code has slightly better
148 # information than the stock Gentoo one (including copyright attribution).
149 'BZIP2', # Single use license, do copyright attribution.
150 'OFL', # Almost single use license, do copyright attribution.
151 'OFL-1.1', # Almost single use license, do copyright attribution.
152 'UoI-NCSA', # Only used by NSCA, might as well show their custom copyright.
155 # This used to provide overrides. I can't find a valid reason to add any more
157 PACKAGE_HOMEPAGES = {
159 # 'x11-proto/glproto': ['http://www.x.org/'],
162 # These are tokens found in LICENSE= in an ebuild that aren't licenses we
163 # can actually read from disk.
164 # You should not use this to blacklist real licenses.
166 ')', # Ignore OR tokens from LICENSE="|| ( LGPL-2.1 MPL-1.1 )"
171 # Find the directory of this script.
172 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
174 # The template files we depend on for generating HTML.
175 TMPL = os.path.join(SCRIPT_DIR, 'about_credits.tmpl')
176 ENTRY_TMPL = os.path.join(SCRIPT_DIR, 'about_credits_entry.tmpl')
177 SHARED_LICENSE_TMPL = os.path.join(
178 SCRIPT_DIR, 'about_credits_shared_license_entry.tmpl')
181 # This is called directly by src/repohooks/pre-upload.py
182 def GetLicenseTypesFromEbuild(ebuild_path):
183 """Returns a list of license types from the ebuild file.
185 This function does not always return the correct list, but it is
186 faster than using portageq for not having to access chroot. It is
187 intended to be used for tasks such as presubmission checks.
190 ebuild_path: ebuild to read.
193 list of licenses read from ebuild.
196 ValueError: ebuild errors.
198 ebuild_env_tmpl = """
199 has() { [[ " ${*:2} " == *" $1 "* ]]; }
201 local overlay_list="%(overlay_list)s"
202 local eclass overlay f
204 has ${eclass} ${_INHERITED_} && continue
205 _INHERITED_+=" ${eclass}"
206 for overlay in %(overlay_list)s; do
207 f="${overlay}/eclass/${eclass}.eclass"
208 if [[ -e ${f} ]]; then
217 # TODO: the overlay_list hard-coded here should be changed to look
218 # at the current overlay, and then the master overlays. E.g. for an
219 # ebuild file in overlay-parrot, we will look at parrot overlay
220 # first, and then look at portage-stable and chromiumos, which are
221 # listed as masters in overlay-parrot/metadata/layout.conf.
223 'ebuild': ebuild_path,
224 'overlay_list': '%s %s' % (
225 os.path.join(constants.SOURCE_ROOT,
226 'src/third_party/chromiumos-overlay'),
227 os.path.join(constants.SOURCE_ROOT,
228 'src/third_party/portage-stable'))
231 with tempfile.NamedTemporaryFile(bufsize=0) as f:
232 osutils.WriteFile(f.name, ebuild_env_tmpl % tmpl_env)
233 env = osutils.SourceEnvironment(
234 f.name, whitelist=['LICENSE'], ifs=' ', multiline=True)
236 if not env.get('LICENSE'):
237 raise ValueError('No LICENSE found in the ebuild.')
238 if re.search(r'[,;]', env['LICENSE']):
240 'LICENSE field in the ebuild should be whitespace-limited.')
242 return env['LICENSE'].split()
245 class PackageLicenseError(Exception):
246 """Thrown if something fails while getting license information for a package.
248 This will cause the processing to error in the end.
252 class PackageInfo(object):
253 """Package specific information, mostly about licenses."""
255 def __init__(self, board, fullnamerev):
256 """Package info initializer.
259 board: The board this package was built for.
260 fullnamerev: package name of the form 'x11-base/X.Org-1.9.3-r23'
263 self.board = board # This field may be None, based on entry path.
266 # Populate these fields from fullnamerev:
267 # category, name, version, revision
270 cpv = portage_util.SplitCPV(fullnamerev)
274 # A bad package can either raise a TypeError exception or return None.
276 raise AssertionError(
277 'portage couldn\'t find %s, missing version number?' % fullnamerev)
280 # These define the package uniquely.
283 self.category, self.name, self.version, self.revision = (
284 cpv.category, cpv.package, cpv.version_no_rev, cpv.rev)
286 if self.revision is not None:
287 self.revision = str(self.revision).lstrip('r')
290 # These fields hold license information used to generate the credits page.
293 # This contains licenses names for this package.
294 self.license_names = set()
296 # Full Text of discovered license information.
297 self.license_text_scanned = []
302 # These fields show the results of processing.
305 # After reading basic package information, we can mark the package as
306 # one to skip in licensing.
309 # If we failed to get licensing for this package, mark it as such so that
310 # it can be flagged when the full license file is being generated.
311 self.licensing_failed = False
313 # Intellegently populate initial skip information.
317 def fullnamerev(self):
318 """e.g. libnl/libnl-3.2.24-r12"""
319 s = '%s-%s' % (self.fullname, self.version)
321 s += '-r%s' % self.revision
326 """e.g. libnl/libnl-3.2.24"""
327 return '%s/%s' % (self.category, self.name)
330 def license_dump_path(self):
331 """e.g. /build/x86-alex/var/db/pkg/sys-apps/dtc-1.4.0/license.yaml.
333 Only valid for packages that have already been emerged.
335 return os.path.join(cros_build_lib.GetSysroot(self.board),
336 PER_PKG_LICENSE_DIR, self.fullnamerev, 'license.yaml')
338 def _RunEbuildPhases(self, ebuild_path, phases):
339 """Run a list of ebuild phases on an ebuild.
342 ebuild_path: exact path of the ebuild file.
343 phases: list of phases like ['clean', 'fetch'] or ['unpack'].
346 ebuild command output
348 return cros_build_lib.RunCommand(
349 ['ebuild-%s' % self.board, ebuild_path] + phases, print_cmd=debug,
350 redirect_stdout=True)
352 def _GetOverrideLicense(self):
353 """Look in COPYRIGHT_ATTRIBUTION_DIR for license with copyright attribution.
355 For dev-util/bsdiff-4.3-r5, the code will look for
356 dev-util/bsdiff-4.3-r5
360 It is ok to have more than one bsdiff license file, and an empty file acts
361 as a rubout (i.e. an empty dev-util/bsdiff-4.4 will shadow dev-util/bsdiff
362 and tell the licensing code to look in the package source for a license
363 instead of using dev-util/bsdiff as an override).
366 False (no license found) or a multiline license string.
369 # dev-util/bsdiff-4.3-r5 -> bsdiff-4.3-r5
370 filename = os.path.basename(self.fullnamerev)
371 license_path = os.path.join(COPYRIGHT_ATTRIBUTION_DIR,
372 os.path.dirname(self.fullnamerev))
373 pv = portage_util.SplitPV(filename)
374 pv_no_rev = '%s-%s' % (pv.package, pv.version_no_rev)
375 for filename in (pv.pv, pv_no_rev, pv.package):
376 file_path = os.path.join(license_path, filename)
377 logging.debug('Looking for override copyright attribution license in %s',
379 if os.path.exists(file_path):
381 # /../merlin/trunk/src/third_party/chromiumos-overlay/../dev-util/bsdiff
383 # chromiumos-overlay/../dev-util/bsdiff
384 short_dir_path = os.path.join(*file_path.rsplit(os.path.sep, 5)[1:])
385 license_read = 'Copyright Attribution License %s:\n\n' % short_dir_path
386 license_read += ReadUnknownEncodedFile(
387 file_path, 'read copyright attribution license')
392 def _ExtractLicenses(self, src_dir, need_copyright_attribution):
393 """Scrounge for text licenses in the source of package we'll unpack.
395 This is only called if we couldn't get usable licenses from the ebuild,
396 or one of them is BSD/MIT like which forces us to look for a file with
397 copyright attribution in the source code itself.
399 First, we have a shortcut where we scan COPYRIGHT_ATTRIBUTION_DIR to see if
400 we find a license for this package. If so, we use that.
401 Typically it'll be used if the unpacked source does not have the license
402 that we're required to display for copyright attribution (in some cases it's
403 plain absent, in other cases, it could be in a filename we don't look for).
405 Otherwise, we scan the unpacked source code for what looks like license
406 files as defined in LICENSE_NAMES_REGEX.
409 AssertionError: on runtime errors
410 PackageLicenseError: couldn't find copyright attribution file.
412 license_override = self._GetOverrideLicense()
414 self.license_text_scanned = [license_override]
418 ebuild_path = self._FindEbuildPath()
419 self._RunEbuildPhases(ebuild_path, ['clean', 'fetch'])
420 raw_output = self._RunEbuildPhases(ebuild_path, ['unpack'])
421 output = raw_output.output.splitlines()
422 # Output is spammy, it looks like this:
423 # * gc-7.2d.tar.gz RMD160 SHA1 SHA256 size ;-) ... [ ok ]
424 # * checking gc-7.2d.tar.gz ;-) ... [ ok ]
425 # * Running stacked hooks for pre_pkg_setup
426 # * sysroot_build_bin_dir ...
428 # * Running stacked hooks for pre_src_unpack
429 # * python_multilib_setup ...
431 # >>> Unpacking source...
432 # >>> Unpacking gc-7.2d.tar.gz to /build/x86-alex/tmp/po/[...]ps-7.2d/work
433 # >>> Source unpacked in /build/x86-alex/tmp/portage/[...]ops-7.2d/work
434 # So we only keep the last 2 lines, the others we don't care about.
435 output = [line for line in output if line[0:3] == '>>>' and
436 line != '>>> Unpacking source...']
440 args = ['portageq-%s' % self.board, 'envvar', 'PORTAGE_TMPDIR']
441 result = cros_build_lib.RunCommand(args, print_cmd=debug,
442 redirect_stdout=True)
443 tmpdir = result.output.splitlines()[0]
444 # tmpdir gets something like /build/daisy/tmp/
445 src_dir = os.path.join(tmpdir, 'portage', self.fullnamerev, 'work')
447 if not os.path.exists(src_dir):
448 raise AssertionError(
449 'Unpack of %s didn\'t create %s. Version mismatch' %
450 (self.fullnamerev, src_dir))
452 # You may wonder how deep should we go?
453 # In case of packages with sub-packages, it could be deep.
454 # Let's just be safe and get everything we can find.
455 # In the case of libatomic_ops, it's actually required to look deep
456 # to find the MIT license:
457 # dev-libs/libatomic_ops-7.2d/work/gc-7.2/libatomic_ops/doc/LICENSING.txt
458 args = ['find', src_dir, '-type', 'f']
459 result = cros_build_lib.RunCommand(args, print_cmd=debug,
460 redirect_stdout=True).output.splitlines()
461 # Truncate results to look like this: swig-2.0.4/COPYRIGHT
462 files = [x[len(src_dir):].lstrip('/') for x in result]
465 # When we scan a source tree managed by git, this can contain license
466 # files that are not part of the source. Exclude those.
467 # (e.g. .git/refs/heads/licensing)
470 basename = os.path.basename(name)
471 # Looking for license.* brings up things like license.gpl, and we
472 # never want a GPL license when looking for copyright attribution,
473 # so we skip them here. We also skip regexes that can return
474 # license.py (seen in some code).
475 if re.search(r'.*GPL.*', basename) or re.search(r'\.py$', basename):
477 for regex in LICENSE_NAMES_REGEX:
478 if re.search(regex, basename, re.IGNORECASE):
479 license_files.append(name)
482 if not license_files:
483 if need_copyright_attribution:
485 %s: unable to find usable license.
486 Typically this will happen because the ebuild says it's MIT or BSD, but there
487 was no license file that this script could find to include along with a
488 copyright attribution (required for BSD/MIT).
490 If this is Google source, please change
495 If not, go investigate the unpacked source in %s,
496 and find which license to assign. Once you found it, you should copy that
497 license to a file under %s
498 (or you can modify LICENSE_NAMES_REGEX to pickup a license file that isn't
499 being scraped currently).""",
500 self.fullnamerev, src_dir, COPYRIGHT_ATTRIBUTION_DIR)
501 raise PackageLicenseError()
503 # We can get called for a license like as-is where it's preferable
504 # to find a better one in the source, but not fatal if we didn't.
505 logging.info('Was not able to find a better license for %s '
506 'in %s to replace the more generic one from ebuild',
507 self.fullnamerev, src_dir)
509 # Examples of multiple license matches:
510 # dev-lang/swig-2.0.4-r1: swig-2.0.4/COPYRIGHT swig-2.0.4/LICENSE
511 # dev-libs/glib-2.32.4-r1: glib-2.32.4/COPYING pkg-config-0.26/COPYING
512 # dev-libs/libnl-3.2.14: libnl-doc-3.2.14/COPYING libnl-3.2.14/COPYING
513 # dev-libs/libpcre-8.30-r2: pcre-8.30/LICENCE pcre-8.30/COPYING
514 # dev-libs/libusb-0.1.12-r6: libusb-0.1.12/COPYING libusb-0.1.12/LICENSE
515 # dev-libs/pyzy-0.1.0-r1: db/COPYING pyzy-0.1.0/COPYING
516 # net-misc/strongswan-5.0.2-r4: strongswan-5.0.2/COPYING
517 # strongswan-5.0.2/LICENSE
518 # sys-process/procps-3.2.8_p11: debian/copyright procps-3.2.8/COPYING
519 logging.info('License(s) for %s: %s', self.fullnamerev,
520 ' '.join(license_files))
521 for license_file in sorted(license_files):
522 # Joy and pink ponies. Some license_files are encoded as latin1 while
523 # others are utf-8 and of course you can't know but only guess.
524 license_path = os.path.join(src_dir, license_file)
525 license_txt = ReadUnknownEncodedFile(license_path, 'Adding License')
527 self.license_text_scanned += [
528 'Scanned Source License %s:\n\n%s' % (license_file, license_txt)]
530 # We used to clean up here, but there have been many instances where
531 # looking at unpacked source to see where the licenses were, was useful
532 # so let's disable this for now
533 # self._RunEbuildPhases(['clean'])
535 def LookForSkip(self):
536 """Look for a reason to skip over this package.
538 Sets self.skip to True if a reason was found.
541 True if a reason was found.
543 if self.category in SKIPPED_CATEGORIES:
544 logging.info('%s in SKIPPED_CATEGORIES, skip package', self.fullname)
547 if self.fullname in SKIPPED_PACKAGES:
548 logging.info('%s in SKIPPED_PACKAGES, skip package', self.fullname)
551 # TODO(dgarrett): There are additional reasons that should be handled here.
555 def _FindEbuildPath(self):
556 """Discover the path to a package's associated ebuild.
558 This method is not valid during the emerge hook process.
561 full path file name of the ebuild file for this package.
564 AssertionError if it can't be discovered for some reason.
566 args = ['equery-%s' % self.board, '-q', '-C', 'which', self.fullnamerev]
568 path = cros_build_lib.RunCommand(args, print_cmd=True,
569 redirect_stdout=True).output.strip()
570 except cros_build_lib.RunCommandError:
573 # Path can be false because of an exception, or a command result.
575 raise AssertionError('_FindEbuildPath for %s failed.\n'
576 'Is your tree clean? Try a rebuild?' %
579 logging.debug('%s -> %s', ' '.join(args), path)
581 if not os.access(path, os.F_OK):
582 raise AssertionError('Can\'t access %s', path)
586 def GetLicenses(self, build_info_dir, src_dir):
587 """Populate the license related fields.
590 license_names, license_text_scanned, homepages,
591 skip, licensing_failed
593 Some packages have static license mappings applied to them that get
594 retrieved from the ebuild.
596 For others, we figure out whether the package source should be scanned to
597 add licenses found there.
600 build_info_dir: Path to the build_info for the ebuild. This can be from
601 the working directory during the emerge hook, or in the portage pkg db.
602 src_dir: Directory to the expanded source code for this package. If None,
603 the source will be expanded, if needed (slow).
606 AssertionError: on runtime errors
607 PackageLicenseError: couldn't find license in ebuild and source.
609 # If the total size installed is zero, we installed no content to license.
610 if _BuildInfo(build_info_dir, 'SIZE').strip() == '0':
614 self.homepages = _BuildInfo(build_info_dir, 'HOMEPAGE').split()
615 ebuild_license_names = _BuildInfo(build_info_dir, 'LICENSE').split()
617 # If this ebuild only uses skipped licenses, skip it.
618 if (ebuild_license_names and
619 all(l in SKIPPED_LICENSES for l in ebuild_license_names)):
625 if self.fullname in PACKAGE_HOMEPAGES:
626 self.homepages = PACKAGE_HOMEPAGES[self.fullname]
628 # Packages with missing licenses or licenses that need mapping (like
629 # BSD/MIT) are hardcoded here:
630 if self.fullname in PACKAGE_LICENSES:
631 ebuild_license_names = PACKAGE_LICENSES[self.fullname]
632 logging.info('Static license mapping for %s: %s', self.fullnamerev,
633 ','.join(ebuild_license_names))
635 logging.info('Read licenses for %s: %s', self.fullnamerev,
636 ','.join(ebuild_license_names))
638 # Lots of packages in chromeos-base have their license set to BSD instead
640 new_license_names = []
641 for license_name in ebuild_license_names:
642 # TODO: temp workaround for http;//crbug.com/348750 , remove when the bug
644 if (license_name == 'BSD' and
645 self.fullnamerev.startswith('chromeos-base/')):
646 license_name = 'BSD-Google'
648 'Fixed BSD->BSD-Google for %s because it\'s in chromeos-base. '
649 'Please fix the LICENSE field in the ebuild', self.fullnamerev)
650 # TODO: temp workaround for http;//crbug.com/348749 , remove when the bug
652 if license_name == 'Proprietary':
653 license_name = 'Google-TOS'
655 'Fixed Proprietary -> Google-TOS for %s. '
656 'Please fix the LICENSE field in the ebuild', self.fullnamerev)
657 new_license_names.append(license_name)
658 ebuild_license_names = new_license_names
660 # The ebuild license field can look like:
661 # LICENSE="GPL-3 LGPL-3 Apache-2.0" (this means AND, as in all 3)
662 # for third_party/portage-stable/app-admin/rsyslog/rsyslog-5.8.11.ebuild
663 # LICENSE="|| ( LGPL-2.1 MPL-1.1 )"
664 # for third_party/portage-stable/x11-libs/cairo/cairo-1.8.8.ebuild
666 # The parser isn't very smart and only has basic support for the
667 # || ( X Y ) OR logic to do the following:
668 # In order to save time needlessly unpacking packages and looking or a
669 # cleartext license (which is really a crapshoot), if we have a license
670 # like BSD that requires looking for copyright attribution, but we can
671 # chose another license like GPL, we do that.
673 if not self.skip and not ebuild_license_names:
674 logging.error('%s: no license found in ebuild. FIXME!', self.fullnamerev)
675 # In a bind, you could comment this out. I'm making the output fail to
676 # get your attention since this error really should be fixed, but if you
677 # comment out the next line, the script will try to find a license inside
679 raise PackageLicenseError()
681 # This is not invalid, but the parser can't deal with it, so if it ever
682 # happens, error out to tell the programmer to do something.
683 # dev-python/pycairo-1.10.0-r4: LGPL-3 || ( LGPL-2.1 MPL-1.1 )
684 if '||' in ebuild_license_names[1:]:
685 logging.error('%s: Can\'t parse || in the middle of a license: %s',
686 self.fullnamerev, ' '.join(ebuild_license_names))
687 raise PackageLicenseError()
689 or_licenses_and_one_is_no_attribution = False
690 # We do a quick early pass first so that the longer pass below can
692 for license_name in [x for x in ebuild_license_names
693 if x not in LICENCES_IGNORE]:
694 # Here we have an OR case, and one license that we can use stock, so
695 # we remember that in order to be able to skip license attributions if
696 # any were in the OR.
697 if (ebuild_license_names[0] == '||' and
698 license_name not in COPYRIGHT_ATTRIBUTION_LICENSES):
699 or_licenses_and_one_is_no_attribution = True
701 need_copyright_attribution = False
702 scan_source_for_licenses = False
704 for license_name in [x for x in ebuild_license_names
705 if x not in LICENCES_IGNORE]:
706 # Licenses like BSD or MIT can't be used as is because they do not contain
707 # copyright self. They have to be replaced by copyright file given in the
708 # source code, or manually mapped by us in PACKAGE_LICENSES
709 if license_name in COPYRIGHT_ATTRIBUTION_LICENSES:
710 # To limit needless efforts, if a package is BSD or GPL, we ignore BSD
711 # and use GPL to avoid scanning the package, but we can only do this if
712 # or_licenses_and_one_is_no_attribution has been set above.
713 # This ensures that if we have License: || (BSD3 BSD4), we will
714 # look in the source.
715 if or_licenses_and_one_is_no_attribution:
716 logging.info('%s: ignore license %s because ebuild LICENSES had %s',
717 self.fullnamerev, license_name,
718 ' '.join(ebuild_license_names))
720 logging.info('%s: can\'t use %s, will scan source code for copyright',
721 self.fullnamerev, license_name)
722 need_copyright_attribution = True
723 scan_source_for_licenses = True
725 self.license_names.add(license_name)
726 # We can't display just 2+ because it only contains text that says to
728 if license_name == 'GPL-2+':
729 self.license_names.add('GPL-2')
730 if license_name == 'LGPL-2+':
731 self.license_names.add('LGPL-2')
733 if license_name in LOOK_IN_SOURCE_LICENSES:
734 logging.info('%s: Got %s, will try to find better license in source...',
735 self.fullnamerev, license_name)
736 scan_source_for_licenses = True
738 if self.license_names:
739 logging.info('%s: using stock|cust license(s) %s',
740 self.fullnamerev, ','.join(self.license_names))
742 # If the license(s) could not be found, or one requires copyright
743 # attribution, dig in the source code for license files:
745 # Read licenses from ebuild for net-dialup/ppp-2.4.5-r3: BSD,GPL-2
746 # We need get the substitution file for BSD and add it to GPL.
747 if scan_source_for_licenses:
748 self._ExtractLicenses(src_dir, need_copyright_attribution)
750 # This shouldn't run, but leaving as sanity check.
751 if not self.license_names and not self.license_text_scanned:
752 raise AssertionError('Didn\'t find usable licenses for %s' %
755 def SaveLicenseDump(self, save_file):
756 """Save PackageInfo contents to a YAML file.
758 This is used to cache license results between the emerge hook phase and
759 credits page generation.
762 save_file: File to save the yaml contents into.
764 logging.debug('Saving license to %s', save_file)
765 yaml_dump = self.__dict__.items()
766 osutils.WriteFile(save_file, yaml.dump(yaml_dump), makedirs=True)
769 class Licensing(object):
770 """Do the actual work of extracting licensing info and outputting html."""
772 def __init__(self, board, package_fullnames, gen_licenses):
775 # List of stock and custom licenses referenced in ebuilds. Used to
776 # print a report. Dict value says which packages use that license.
779 # Licenses are supposed to be generated at package build time and be
780 # ready for us, but in case they're not, they can be generated.
781 self.gen_licenses = gen_licenses
783 # This keeps track of whether we have an incomplete license file due to
784 # package errors during parsing.
785 # Any non empty list at the end shows the list of packages that caused
787 self.incomplete_packages = []
789 self.package_text = {}
790 self.entry_template = None
792 # We need to have a dict for the list of packages objects, index by package
793 # fullnamerev, so that when we scan our licenses at the end, and find out
794 # some shared licenses are only used by one package, we can access that
795 # package object by name, and add the license directly in that object.
797 self._package_fullnames = package_fullnames
800 def sorted_licenses(self):
801 return sorted(self.licenses.keys(), key=str.lower)
803 def _LoadLicenseDump(self, pkg):
804 save_file = pkg.license_dump_path
805 logging.debug('Getting license from %s for %s', save_file, pkg.name)
806 yaml_dump = yaml.load(osutils.ReadFile(save_file))
807 for key, value in yaml_dump:
808 pkg.__dict__[key] = value
810 def LicensedPackages(self, license_name):
811 """Return list of packages using a given license."""
812 return self.licenses[license_name]
814 def LoadPackageInfo(self):
815 """Populate basic package info for all packages from their ebuild."""
816 for package_name in self._package_fullnames:
817 pkg = PackageInfo(self.board, package_name)
818 self.packages[package_name] = pkg
820 def ProcessPackageLicenses(self):
821 """Iterate through all packages provided and gather their licenses.
823 GetLicenses will scrape licenses from the code and/or gather stock license
824 names. We gather the list of stock and custom ones for later processing.
826 Do not call this after adding virtual packages with AddExtraPkg.
828 for package_name in self.packages:
829 pkg = self.packages[package_name]
831 if self.gen_licenses:
832 logging.info('Package %s is in skip list', package_name)
835 # Other skipped packages get dumped with incomplete info and the skip flag
836 if not os.path.exists(pkg.license_dump_path) and not self.gen_licenses:
837 logging.warning('>>> License for %s is missing, creating now <<<',
839 if not os.path.exists(pkg.license_dump_path) or self.gen_licenses:
841 build_info_path = os.path.join(
842 cros_build_lib.GetSysroot(pkg.board),
843 PER_PKG_LICENSE_DIR, pkg.fullnamerev)
844 pkg.GetLicenses(build_info_path, None)
845 except PackageLicenseError:
846 pkg.licensing_failed = True
848 # We dump packages where licensing failed too.
849 pkg.SaveLicenseDump(pkg.license_dump_path)
851 # To debug the code, we force the data to be re-read from the dumps
852 # instead of reusing what we may have in memory.
853 for package_name in self.packages:
854 pkg = self.packages[package_name]
855 if pkg.category == 'virtual':
858 self._LoadLicenseDump(pkg)
859 logging.debug('loaded dump for %s', pkg.fullnamerev)
861 logging.info('Package %s is in skip list', pkg.fullnamerev)
862 if pkg.licensing_failed:
863 logging.info('Package %s failed licensing', pkg.fullnamerev)
864 self.incomplete_packages += [pkg.fullnamerev]
866 def AddExtraPkg(self, fullnamerev, homepages, license_names):
867 """Allow adding pre-created virtual packages.
869 GetLicenses will not work on them, so add them after having run
873 fullnamerev: package name of the form x11-base/X.Org-1.9.3-r23
874 homepages: list of url strings.
875 license_names: list of license name strings.
877 pkg = PackageInfo(self.board, fullnamerev)
878 pkg.homepages = homepages # this is a list
879 pkg.license_names = license_names # this is also a list
880 self.packages[fullnamerev] = pkg
882 # Called directly by src/repohooks/pre-upload.py
884 def FindLicenseType(license_name):
885 """Says if a license is stock Gentoo, custom, or doesn't exist."""
887 for directory in STOCK_LICENSE_DIRS:
888 path = os.path.join(directory, license_name)
889 if os.path.exists(path):
890 return 'Gentoo Package Stock'
892 for directory in CUSTOM_LICENSE_DIRS:
893 path = os.path.join(directory, license_name)
894 if os.path.exists(path):
897 if license_name in SKIPPED_LICENSES:
900 raise AssertionError("""
901 license %s could not be found in %s
902 If the license in the ebuild is correct,
903 a) a stock license should be added to portage-stable/licenses :
904 running `cros_portage_upgrade` inside of the chroot should clone this repo
906 https://chromium.googlesource.com/chromiumos/overlays/portage/+/gentoo
907 find the new licenses under licenses, and add them to portage-stable/licenses
909 b) if it's a non gentoo package with a custom license, you can copy that license
910 to third_party/chromiumos-overlay/licenses/
912 Try re-running the script with -p cat/package-ver --generate
913 after fixing the license.""" %
915 '\n'.join(STOCK_LICENSE_DIRS + CUSTOM_LICENSE_DIRS))
919 def ReadSharedLicense(license_name):
920 """Read and return stock or cust license file specified in an ebuild."""
923 for directory in STOCK_LICENSE_DIRS + CUSTOM_LICENSE_DIRS:
924 path = os.path.join(directory, license_name)
925 if os.path.exists(path):
930 return ReadUnknownEncodedFile(license_path, 'read license')
932 raise AssertionError('license %s could not be found in %s'
934 '\n'.join(STOCK_LICENSE_DIRS +
935 CUSTOM_LICENSE_DIRS))
939 def EvaluateTemplate(template, env):
940 """Expand a template with vars like {{foo}} using a dict of expansions."""
941 # TODO switch to stock python templates.
942 for key, val in env.iteritems():
943 template = template.replace('{{%s}}' % key, val)
946 def _GeneratePackageLicenseText(self, pkg):
947 """Concatenate all licenses related to a pkg.
949 This means a combination of ebuild shared licenses and licenses read from
950 the pkg source tree, if any.
953 pkg: PackageInfo object
956 AssertionError: on runtime errors
959 for license_text_scanned in pkg.license_text_scanned:
960 license_text.append(license_text_scanned)
961 license_text.append('%s\n' % ('-=' * 40))
963 license_pointers = []
964 # sln: shared license name.
965 for sln in pkg.license_names:
966 # Says whether it's a stock gentoo or custom license.
967 license_type = self.FindLicenseType(sln)
968 license_pointers.append(
969 "<li><a href='#%s'>%s License %s</a></li>" % (
970 sln, license_type, sln))
972 # This should get caught earlier, but one extra check.
973 if not license_text + license_pointers:
974 raise AssertionError('Ended up with no license_text for %s' %
978 'name': '%s-%s' % (pkg.name, pkg.version),
979 'url': cgi.escape(pkg.homepages[0]) if pkg.homepages else '',
980 'licenses_txt': cgi.escape('\n'.join(license_text)) or '',
981 'licenses_ptr': '\n'.join(license_pointers) or '',
983 self.package_text[pkg] = self.EvaluateTemplate(self.entry_template, env)
985 def GenerateHTMLLicenseOutput(self, output_file,
986 output_template=TMPL,
987 entry_template=ENTRY_TMPL,
988 license_template=SHARED_LICENSE_TMPL):
989 """Generate the combined html license file used in ChromeOS.
992 output_file: resulting HTML license output.
993 output_template: template for the entire HTML file.
994 entry_template: template for per package entries.
995 license_template: template for shared license entries.
997 self.entry_template = ReadUnknownEncodedFile(entry_template)
998 sorted_license_txt = []
1000 # Keep track of which licenses are used by which packages.
1001 for pkg in self.packages.values():
1002 if pkg.skip or pkg.licensing_failed:
1004 for sln in pkg.license_names:
1005 self.licenses.setdefault(sln, []).append(pkg.fullnamerev)
1007 # Find licenses only used once, and roll them in the package that uses them.
1008 # We use keys() because licenses is modified in the loop, so we can't use
1010 for sln in self.licenses.keys():
1011 if len(self.licenses[sln]) == 1:
1012 pkg_fullnamerev = self.licenses[sln][0]
1013 logging.info('Collapsing shared license %s into single use license '
1014 '(only used by %s)', sln, pkg_fullnamerev)
1015 license_type = self.FindLicenseType(sln)
1016 license_txt = self.ReadSharedLicense(sln)
1017 single_license = '%s License %s:\n\n%s' % (license_type, sln,
1019 pkg = self.packages[pkg_fullnamerev]
1020 pkg.license_text_scanned.append(single_license)
1021 pkg.license_names.remove(sln)
1022 del self.licenses[sln]
1024 for pkg in sorted(self.packages.values(),
1025 key=lambda x: (x.name.lower(), x.version, x.revision)):
1027 logging.debug('Skipping package %s', pkg.fullnamerev)
1029 if pkg.licensing_failed:
1030 logging.debug('Package %s failed licensing, skipping', pkg.fullnamerev)
1032 self._GeneratePackageLicenseText(pkg)
1033 sorted_license_txt += [self.package_text[pkg]]
1035 # Now generate the bottom of the page that will contain all the shared
1036 # licenses and a list of who is pointing to them.
1037 license_template = ReadUnknownEncodedFile(license_template)
1040 for license_name in self.sorted_licenses:
1042 'license_name': license_name,
1043 'license': cgi.escape(self.ReadSharedLicense(license_name)),
1044 'license_type': self.FindLicenseType(license_name),
1045 'license_packages': ' '.join(self.LicensedPackages(license_name)),
1047 licenses_txt += [self.EvaluateTemplate(license_template, env)]
1049 file_template = ReadUnknownEncodedFile(output_template)
1051 'entries': '\n'.join(sorted_license_txt),
1052 'licenses': '\n'.join(licenses_txt),
1054 osutils.WriteFile(output_file,
1055 self.EvaluateTemplate(file_template, env).encode('UTF-8'))
1058 def ListInstalledPackages(board, all_packages=False):
1059 """Return a list of all packages installed for a particular board."""
1061 # If all_packages is set to True, all packages visible in the build
1062 # chroot are used to generate the licensing file. This is not what you want
1063 # for a release license file, but it's a way to run licensing checks against
1065 # If it's set to False, it will only generate a licensing file that contains
1066 # packages used for a release build (as determined by the dependencies for
1067 # virtual/target-os).
1070 # The following returns all packages that were part of the build tree
1071 # (many get built or used during the build, but do not get shipped).
1072 # Note that it also contains packages that are in the build as
1073 # defined by build_packages but not part of the image we ship.
1074 args = ['equery-%s' % board, 'list', '*']
1075 packages = cros_build_lib.RunCommand(args, print_cmd=debug,
1076 redirect_stdout=True
1077 ).output.splitlines()
1079 # The following returns all packages that were part of the build tree
1080 # (many get built or used during the build, but do not get shipped).
1081 # Note that it also contains packages that are in the build as
1082 # defined by build_packages but not part of the image we ship.
1083 args = ['emerge-%s' % board, '--with-bdeps=y', '--usepkgonly',
1084 '--emptytree', '--pretend', '--color=n', 'virtual/target-os']
1085 emerge = cros_build_lib.RunCommand(args, print_cmd=debug,
1086 redirect_stdout=True).output.splitlines()
1087 # Another option which we've decided not to use, is bdeps=n. This outputs
1088 # just the packages we ship, but does not packages that were used to build
1089 # them, including a package like flex which generates a .a that is included
1090 # and shipped in ChromeOS.
1091 # We've decided to credit build packages, even if we're not legally required
1092 # to (it's always nice to do), and that way we get corner case packages like
1093 # flex. This is why we use bdep=y and not bdep=n.
1096 # [binary R ] x11-libs/libva-1.1.1 to /build/x86-alex/
1097 pkg_rgx = re.compile(r'\[[^]]+R[^]]+\] (.+) to /build/.*')
1098 # If we match something else without the 'R' like
1099 # [binary U ] chromeos-base/pepper-flash-13.0.0.133-r1 [12.0.0.77-r1]
1100 # this is bad and we should die on this.
1101 pkg_rgx2 = re.compile(r'(\[[^]]+\] .+) to /build/.*')
1103 match = pkg_rgx.search(line)
1104 match2 = pkg_rgx2.search(line)
1106 packages.append(match.group(1))
1108 raise AssertionError('Package incorrectly installed, try eclean-%s' %
1109 board, '\n%s' % match2.group(1))
1114 def _HandleIllegalXMLChars(text):
1115 """Handles illegal XML Characters.
1117 XML 1.0 acceptable character range:
1118 Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | \
1121 This function finds all illegal characters in the text and filters
1122 out all whitelisted characters (e.g. ^L).
1125 text: text to examine.
1128 Filtered |text| and a list of non-whitelisted illegal characters found.
1130 whitelist_re = re.compile(u'[\x0c]')
1131 text = whitelist_re.sub('', text)
1132 # illegal_chars_re includes all illegal characters (whitelisted or
1133 # not), so we can expand the whitelist without modifying this line.
1134 illegal_chars_re = re.compile(
1135 u'[\x00-\x08\x0b\x0c\x0e-\x1F\uD800-\uDFFF\uFFFE\uFFFF]')
1136 return (text, illegal_chars_re.findall(text))
1139 def ReadUnknownEncodedFile(file_path, logging_text=None):
1140 """Read a file of unknown encoding (UTF-8 or latin) by trying in sequence.
1143 file_path: what to read.
1144 logging_text: what to display for logging depending on file read.
1147 File content, possibly converted from latin1 to UTF-8.
1150 Assertion error: if non-whitelisted illegal XML characters
1151 are found in the file.
1152 ValueError: returned if we get invalid XML.
1155 with codecs.open(file_path, encoding='utf-8') as c:
1158 logging.info('%s %s (UTF-8)', logging_text, file_path)
1159 except UnicodeDecodeError:
1160 with codecs.open(file_path, encoding='latin1') as c:
1163 logging.info('%s %s (latin1)', logging_text, file_path)
1165 file_txt, char_list = _HandleIllegalXMLChars(file_txt)
1168 raise ValueError('Illegal XML characters %s found in %s.' %
1169 (char_list, file_path))
1174 def _BuildInfo(build_info_path, filename):
1175 """Fetch contents of a file from portage build_info directory.
1177 Portage maintains a build_info directory that exists both during the process
1178 of emerging an ebuild, and (in a different location) after the ebuild has been
1181 Various useful data files exist there like:
1182 'CATEGORY', 'PF', 'SIZE', 'HOMEPAGE', 'LICENSE'
1185 build_info_path: Path to the build_info directory to read from.
1186 filename: Name of the file to read.
1189 Contents of the file as a string, or "".
1191 filename = os.path.join(build_info_path, filename)
1193 # Buildinfo properties we read are in US-ASCII, not Unicode.
1195 bi = osutils.ReadFile(filename).rstrip()
1196 # Some properties like HOMEPAGE may be absent.
1202 def HookPackageProcess(pkg_build_path):
1203 """Different entry point to populate a packageinfo.
1205 This is called instead of LoadPackageInfo when called by a package build.
1208 pkg_build_path: unpacked being built by emerge.
1210 build_info_dir = os.path.join(pkg_build_path, 'build-info')
1212 fullnamerev = '%s/%s' % (_BuildInfo(build_info_dir, 'CATEGORY'),
1213 _BuildInfo(build_info_dir, 'PF'))
1214 logging.debug('Computed package name %s from %s',
1215 fullnamerev, pkg_build_path)
1217 pkg = PackageInfo(None, fullnamerev)
1219 src_dir = os.path.join(pkg_build_path, 'work')
1220 pkg.GetLicenses(build_info_dir, src_dir)
1222 pkg.SaveLicenseDump(os.path.join(build_info_dir, 'license.yaml'))