From 1a03a4b2469673e20f54722728a6aa8bb1261a61 Mon Sep 17 00:00:00 2001 From: liangcao Date: Fri, 23 Aug 2013 14:40:35 -0700 Subject: [PATCH] SPDX:real-time license scanning and SPDX output. SPDX integrates real-time license scanning, generates SPDX standard output and license verification information during the OE-Core build process. The existing module includes scanning patched packages and creating package and file level SPDX documents. (From OE-Core rev: 7a37cc81fb95d56b5ac5e5ca22a1900e45717911) Signed-off-by: liangcao Signed-off-by: Elizabeth Flanagan Signed-off-by: Saul Wold Signed-off-by: Richard Purdie --- meta/classes/spdx.bbclass | 321 ++++++++++++++++++++++++++++++++++++++++++++++ meta/conf/licenses.conf | 51 +++++++- 2 files changed, 371 insertions(+), 1 deletion(-) create mode 100644 meta/classes/spdx.bbclass diff --git a/meta/classes/spdx.bbclass b/meta/classes/spdx.bbclass new file mode 100644 index 0000000..bde6e49 --- /dev/null +++ b/meta/classes/spdx.bbclass @@ -0,0 +1,321 @@ +# This class integrates real-time license scanning, generation of SPDX standard +# output and verifiying license info during the building process. +# It is a combination of efforts from the OE-Core, SPDX and Fossology projects. +# +# For more information on FOSSology: +# http://www.fossology.org +# +# For more information on FOSSologySPDX commandline: +# https://github.com/spdx-tools/fossology-spdx/wiki/Fossology-SPDX-Web-API +# +# For more information on SPDX: +# http://www.spdx.org +# + +# SPDX file will be output to the path which is defined as[SPDX_MANIFEST_DIR] +# in ./meta/conf/licenses.conf. + +SPDXOUTPUTDIR = "${WORKDIR}/spdx_output_dir" +SPDXSSTATEDIR = "${WORKDIR}/spdx_sstate_dir" + +python do_spdx () { + import os, sys + import json + + info = {} + info['workdir'] = (d.getVar('WORKDIR', True) or "") + info['sourcedir'] = (d.getVar('S', True) or "") + info['pn'] = (d.getVar( 'PN', True ) or "") + info['pv'] = (d.getVar( 'PV', True ) or "") + info['src_uri'] = (d.getVar( 'SRC_URI', True ) or "") + info['spdx_version'] = (d.getVar('SPDX_VERSION', True) or '') + info['data_license'] = (d.getVar('DATA_LICENSE', True) or '') + + spdx_sstate_dir = (d.getVar('SPDXSSTATEDIR', True) or "") + manifest_dir = (d.getVar('SPDX_MANIFEST_DIR', True) or "") + info['outfile'] = os.path.join(manifest_dir, info['pn'] + ".spdx" ) + sstatefile = os.path.join(spdx_sstate_dir, + info['pn'] + info['pv'] + ".spdx" ) + info['spdx_temp_dir'] = (d.getVar('SPDX_TEMP_DIR', True) or "") + info['tar_file'] = os.path.join( info['workdir'], info['pn'] + ".tar.gz" ) + + + ## get everything from cache. use it to decide if + ## something needs to be rerun + cur_ver_code = get_ver_code( info['sourcedir'] ) + cache_cur = False + if not os.path.exists( spdx_sstate_dir ): + bb.mkdirhier( spdx_sstate_dir ) + if not os.path.exists( info['spdx_temp_dir'] ): + bb.mkdirhier( info['spdx_temp_dir'] ) + if os.path.exists( sstatefile ): + ## cache for this package exists. read it in + cached_spdx = get_cached_spdx( sstatefile ) + + if cached_spdx['PackageVerificationCode'] == cur_ver_code: + bb.warn(info['pn'] + "'s ver code same as cache's. do nothing") + cache_cur = True + else: + local_file_info = setup_foss_scan( info, + True, cached_spdx['Files'] ) + else: + local_file_info = setup_foss_scan( info, False, None ) + + if cache_cur: + spdx_file_info = cached_spdx['Files'] + else: + ## setup fossology command + foss_server = (d.getVar('FOSS_SERVER', True) or "") + foss_flags = (d.getVar('FOSS_WGET_FLAGS', True) or "") + foss_command = "wget %s --post-file=%s %s"\ + % (foss_flags,info['tar_file'],foss_server) + + #bb.warn(info['pn'] + json.dumps(local_file_info)) + foss_file_info = run_fossology( foss_command ) + spdx_file_info = create_spdx_doc( local_file_info, foss_file_info ) + ## write to cache + write_cached_spdx(sstatefile,cur_ver_code,spdx_file_info) + + ## Get document and package level information + spdx_header_info = get_header_info(info, cur_ver_code, spdx_file_info) + + ## CREATE MANIFEST + create_manifest(info,spdx_header_info,spdx_file_info) + + ## clean up the temp stuff + remove_dir_tree( info['spdx_temp_dir'] ) + if os.path.exists(info['tar_file']): + remove_file( info['tar_file'] ) +} +addtask spdx after do_patch before do_configure + +def create_manifest(info,header,files): + with open(info['outfile'], 'w') as f: + f.write(header + '\n') + for chksum, block in files.iteritems(): + for key, value in block.iteritems(): + f.write(key + ": " + value) + f.write('\n') + f.write('\n') + +def get_cached_spdx( sstatefile ): + import json + cached_spdx_info = {} + with open( sstatefile, 'r' ) as f: + try: + cached_spdx_info = json.load(f) + except ValueError as e: + cached_spdx_info = None + return cached_spdx_info + +def write_cached_spdx( sstatefile, ver_code, files ): + import json + spdx_doc = {} + spdx_doc['PackageVerificationCode'] = ver_code + spdx_doc['Files'] = {} + spdx_doc['Files'] = files + with open( sstatefile, 'w' ) as f: + f.write(json.dumps(spdx_doc)) + +def setup_foss_scan( info, cache, cached_files ): + import errno, shutil + import tarfile + file_info = {} + cache_dict = {} + + for f_dir, f in list_files( info['sourcedir'] ): + full_path = os.path.join( f_dir, f ) + abs_path = os.path.join(info['sourcedir'], full_path) + dest_dir = os.path.join( info['spdx_temp_dir'], f_dir ) + dest_path = os.path.join( info['spdx_temp_dir'], full_path ) + try: + stats = os.stat(abs_path) + except OSError as e: + bb.warn( "Stat failed" + str(e) + "\n") + continue + + checksum = hash_file( abs_path ) + mtime = time.asctime(time.localtime(stats.st_mtime)) + + ## retain cache information if it exists + file_info[checksum] = {} + if cache and checksum in cached_files: + file_info[checksum] = cached_files[checksum] + else: + file_info[checksum]['FileName'] = full_path + + try: + os.makedirs( dest_dir ) + except OSError as e: + if e.errno == errno.EEXIST and os.path.isdir(dest_dir): + pass + else: + bb.warn( "mkdir failed " + str(e) + "\n" ) + continue + + if(cache and checksum not in cached_files) or not cache: + try: + shutil.copyfile( abs_path, dest_path ) + except shutil.Error as e: + bb.warn( str(e) + "\n" ) + except IOError as e: + bb.warn( str(e) + "\n" ) + + with tarfile.open( info['tar_file'], "w:gz" ) as tar: + tar.add( info['spdx_temp_dir'], arcname=os.path.basename(info['spdx_temp_dir']) ) + tar.close() + + return file_info + + +def remove_dir_tree( dir_name ): + import shutil + try: + shutil.rmtree( dir_name ) + except: + pass + +def remove_file( file_name ): + try: + os.remove( file_name ) + except OSError as e: + pass + +def list_files( dir ): + for root, subFolders, files in os.walk( dir ): + for f in files: + rel_root = os.path.relpath( root, dir ) + yield rel_root, f + return + +def hash_file( file_name ): + try: + f = open( file_name, 'rb' ) + data_string = f.read() + except: + return None + finally: + f.close() + sha1 = hash_string( data_string ) + return sha1 + +def hash_string( data ): + import hashlib + sha1 = hashlib.sha1() + sha1.update( data ) + return sha1.hexdigest() + +def run_fossology( foss_command ): + import string, re + import subprocess + + p = subprocess.Popen(foss_command.split(), + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + foss_output, foss_error = p.communicate() + + records = [] + records = re.findall('FileName:.*?', foss_output, re.S) + + file_info = {} + for rec in records: + rec = string.replace( rec, '\r', '' ) + chksum = re.findall( 'FileChecksum: SHA1: (.*)\n', rec)[0] + file_info[chksum] = {} + file_info[chksum]['FileCopyrightText'] = re.findall( 'FileCopyrightText: ' + + '(.*?)', rec, re.S )[0] + fields = ['FileType','LicenseConcluded', + 'LicenseInfoInFile','FileName'] + for field in fields: + file_info[chksum][field] = re.findall(field + ': (.*)', rec)[0] + + return file_info + +def create_spdx_doc( file_info, scanned_files ): + import json + ## push foss changes back into cache + for chksum, lic_info in scanned_files.iteritems(): + if chksum in file_info: + file_info[chksum]['FileName'] = file_info[chksum]['FileName'] + file_info[chksum]['FileType'] = lic_info['FileType'] + file_info[chksum]['FileChecksum: SHA1'] = chksum + file_info[chksum]['LicenseInfoInFile'] = lic_info['LicenseInfoInFile'] + file_info[chksum]['LicenseConcluded'] = lic_info['LicenseConcluded'] + file_info[chksum]['FileCopyrightText'] = lic_info['FileCopyrightText'] + else: + bb.warn(lic_info['FileName'] + " : " + chksum + + " : is not in the local file info: " + + json.dumps(lic_info,indent=1)) + return file_info + +def get_ver_code( dirname ): + chksums = [] + for f_dir, f in list_files( dirname ): + try: + stats = os.stat(os.path.join(dirname,f_dir,f)) + except OSError as e: + bb.warn( "Stat failed" + str(e) + "\n") + continue + chksums.append(hash_file(os.path.join(dirname,f_dir,f))) + ver_code_string = ''.join( chksums ).lower() + ver_code = hash_string( ver_code_string ) + return ver_code + +def get_header_info( info, spdx_verification_code, spdx_files ): + """ + Put together the header SPDX information. + Eventually this needs to become a lot less + of a hardcoded thing. + """ + from datetime import datetime + import os + head = [] + DEFAULT = "NOASSERTION" + + #spdx_verification_code = get_ver_code( info['sourcedir'] ) + package_checksum = '' + if os.path.exists(info['tar_file']): + package_checksum = hash_file( info['tar_file'] ) + else: + package_checksum = DEFAULT + + ## document level information + head.append("SPDXVersion: " + info['spdx_version']) + head.append("DataLicense: " + info['data_license']) + head.append("DocumentComment: SPDX for " + + info['pn'] + " version " + info['pv'] + "") + head.append("") + + ## Creator information + now = datetime.now().strftime('%Y-%m-%dT%H:%M:%S') + head.append("## Creation Information") + head.append("Creator: fossology-spdx") + head.append("Created: " + now) + head.append("CreatorComment: UNO") + head.append("") + + ## package level information + head.append("## Package Information") + head.append("PackageName: " + info['pn']) + head.append("PackageVersion: " + info['pv']) + head.append("PackageDownloadLocation: " + DEFAULT) + head.append("PackageSummary: ") + head.append("PackageFileName: " + os.path.basename(info['tar_file'])) + head.append("PackageSupplier: Person:" + DEFAULT) + head.append("PackageOriginator: Person:" + DEFAULT) + head.append("PackageChecksum: SHA1: " + package_checksum) + head.append("PackageVerificationCode: " + spdx_verification_code) + head.append("PackageDescription: " + info['pn'] + + " version " + info['pv'] + "") + head.append("") + head.append("PackageCopyrightText: " + DEFAULT + "") + head.append("") + head.append("PackageLicenseDeclared: " + DEFAULT) + head.append("PackageLicenseConcluded: " + DEFAULT) + head.append("PackageLicenseInfoFromFiles: " + DEFAULT) + head.append("") + + ## header for file level + head.append("## File Information") + head.append("") + + return '\n'.join(head) diff --git a/meta/conf/licenses.conf b/meta/conf/licenses.conf index 922b84c..b41d0a8 100644 --- a/meta/conf/licenses.conf +++ b/meta/conf/licenses.conf @@ -113,6 +113,55 @@ SPDXLICENSEMAP[SGIv1] = "SGI-1" # Set if you want the license.manifest copied to the image #COPY_LIC_MANIFEST = "1" -# If you want the pkg licenses copied over as well you must set +# If you want the pkg licenses copied over as well you must set # both COPY_LIC_MANIFEST and COPY_LIC_DIRS #COPY_LIC_DIRS = "1" + +## SPDX temporary directory +SPDX_TEMP_DIR = "${WORKDIR}/spdx_temp" +SPDX_MANIFEST_DIR = "/home/yocto/fossology_scans" + +## SPDX Format info +SPDX_VERSION = "SPDX-1.1" +DATA_LICENSE = "CC0-1.0" + +## Fossology scan information +# You can set option to control if the copyright information will be skipped +# during the identification process. +# +# It is defined as [FOSS_COPYRIGHT] in ./meta/conf/licenses.conf. +# FOSS_COPYRIGHT = "true" +# NO copyright will be processed. That means only license information will be +# identified and output to SPDX file +# FOSS_COPYRIGHT = "false" +# Copyright will be identified and output to SPDX file along with license +# information. The process will take more time than not processing copyright +# information. +# + +FOSS_COPYRIGHT = "true" + +# A option defined as[FOSS_RECURSIVE_UNPACK] in ./meta/conf/licenses.conf. is +# used to control if FOSSology server need recursively unpack tar.gz file which +# is sent from do_spdx task. +# +# FOSS_RECURSIVE_UNPACK = "false": +# FOSSology server does NOT recursively unpack. In the current release, this +# is the default choice because recursively unpack will not necessarily break +# down original compressed files. +# FOSS_RECURSIVE_UNPACK = "true": +# FOSSology server recursively unpack components. +# + +FOSS_RECURSIVE_UNPACK = "false" + +# FOSSologySPDX instance server. +# For more information on FOSSologySPDX commandline: +# https://github.com/spdx-tools/fossology-spdx/wiki/Fossology-SPDX-Web-API +# + +FOSS_SERVER = "http://localhost//?mod=spdx_license_once&noCopyright=${FOSS_COPYRIGHT}&recursiveUnpack=${FOSS_RECURSIVE_UNPACK}" + +FOSS_WGET_FLAGS = "-qO - --no-check-certificate --timeout=0" + + -- 2.7.4