src/third_party/WebKit/Tools/Scripts/webkitpy/w3c/test_importer.py

   1 # Copyright (C) 2013 Adobe Systems Incorporated. All rights reserved.
   2 #
   3 # Redistribution and use in source and binary forms, with or without
   4 # modification, are permitted provided that the following conditions
   5 # are met:
   6 #
   7 # 1. Redistributions of source code must retain the above
   8 #    copyright notice, this list of conditions and the following
   9 #    disclaimer.
  10 # 2. Redistributions in binary form must reproduce the above
  11 #    copyright notice, this list of conditions and the following
  12 #    disclaimer in the documentation and/or other materials
  13 #    provided with the distribution.
  14 #
  15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER "AS IS" AND ANY
  16 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  18 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE
  19 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
  20 # OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  21 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  22 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
  24 # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
  25 # THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26 # SUCH DAMAGE.
  27
  28 """
  29  This script imports a directory of W3C tests into WebKit.
  30
  31  This script will import the tests into WebKit following these rules:
  32
  33     - By default, all tests are imported under LayoutTests/w3c/[repo-name].
  34
  35     - By default, only reftests and jstest are imported. This can be overridden
  36       with a -a or --all argument
  37
  38     - Also by default, if test files by the same name already exist in the
  39       destination directory, they are overwritten with the idea that running
  40       this script would refresh files periodically.  This can also be
  41       overridden by a -n or --no-overwrite flag
  42
  43     - All files are converted to work in WebKit:
  44          1. Paths to testharness.js and vendor-prefix.js files are modified to
  45             point to Webkit's copy of them in LayoutTests/resources, using the
  46             correct relative path from the new location.
  47          2. All CSS properties requiring the -webkit-vendor prefix are prefixed
  48             (the list of what needs prefixes is read from Source/WebCore/CSS/CSSProperties.in).
  49          3. Each reftest has its own copy of its reference file following
  50             the naming conventions new-run-webkit-tests expects.
  51          4. If a reference files lives outside the directory of the test that
  52             uses it, it is checked for paths to support files as it will be
  53             imported into a different relative position to the test file
  54             (in the same directory).
  55          5. Any tags with the class "instructions" have style="display:none" added
  56             to them. Some w3c tests contain instructions to manual testers which we
  57             want to strip out (the test result parser only recognizes pure testharness.js
  58             output and not those instructions).
  59
  60      - Upon completion, script outputs the total number tests imported, broken
  61        down by test type
  62
  63      - Also upon completion, if we are not importing the files in place, each
  64        directory where files are imported will have a w3c-import.log file written with
  65        a timestamp, the W3C Mercurial changeset if available, the list of CSS
  66        properties used that require prefixes, the list of imported files, and
  67        guidance for future test modification and maintenance. On subsequent
  68        imports, this file is read to determine if files have been
  69        removed in the newer changesets.  The script removes these files
  70        accordingly.
  71 """
  72
  73 # FIXME: Change this file to use the Host abstractions rather that os, sys, shutils, etc.
  74
  75 import datetime
  76 import logging
  77 import mimetypes
  78 import optparse
  79 import os
  80 import shutil
  81 import sys
  82
  83 from webkitpy.common.host import Host
  84 from webkitpy.common.webkit_finder import WebKitFinder
  85 from webkitpy.common.system.executive import ScriptError
  86 from webkitpy.layout_tests.models.test_expectations import TestExpectationParser
  87 from webkitpy.w3c.test_parser import TestParser
  88 from webkitpy.w3c.test_converter import convert_for_webkit
  89
  90
  91 CHANGESET_NOT_AVAILABLE = 'Not Available'
  92
  93
  94 _log = logging.getLogger(__name__)
  95
  96
  97 def main(_argv, _stdout, _stderr):
  98     options, args = parse_args()
  99     dir_to_import = os.path.normpath(os.path.abspath(args[0]))
 100     if len(args) == 1:
 101         top_of_repo = dir_to_import
 102     else:
 103         top_of_repo = os.path.normpath(os.path.abspath(args[1]))
 104
 105     if not os.path.exists(dir_to_import):
 106         sys.exit('Directory %s not found!' % dir_to_import)
 107     if not os.path.exists(top_of_repo):
 108         sys.exit('Repository directory %s not found!' % top_of_repo)
 109     if top_of_repo not in dir_to_import:
 110         sys.exit('Repository directory %s must be a parent of %s' % (top_of_repo, dir_to_import))
 111
 112     configure_logging()
 113     test_importer = TestImporter(Host(), dir_to_import, top_of_repo, options)
 114     test_importer.do_import()
 115
 116
 117 def configure_logging():
 118     class LogHandler(logging.StreamHandler):
 119
 120         def format(self, record):
 121             if record.levelno > logging.INFO:
 122                 return "%s: %s" % (record.levelname, record.getMessage())
 123             return record.getMessage()
 124
 125     logger = logging.getLogger()
 126     logger.setLevel(logging.INFO)
 127     handler = LogHandler()
 128     handler.setLevel(logging.INFO)
 129     logger.addHandler(handler)
 130     return handler
 131
 132
 133 def parse_args():
 134     parser = optparse.OptionParser(usage='usage: %prog [options] [dir_to_import] [top_of_repo]')
 135     parser.add_option('-n', '--no-overwrite', dest='overwrite', action='store_false', default=True,
 136         help='Flag to prevent duplicate test files from overwriting existing tests. By default, they will be overwritten.')
 137     parser.add_option('-a', '--all', action='store_true', default=False,
 138         help='Import all tests including reftests, JS tests, and manual/pixel tests. By default, only reftests and JS tests are imported.')
 139     parser.add_option('-d', '--dest-dir', dest='destination', default='w3c',
 140         help='Import into a specified directory relative to the LayoutTests root. By default, files are imported under LayoutTests/w3c.')
 141     parser.add_option('--ignore-expectations', action='store_true', default=False,
 142         help='Ignore the W3CImportExpectations file and import everything.')
 143     parser.add_option('--dry-run', action='store_true', default=False,
 144         help='Dryrun only (don\'t actually write any results).')
 145
 146     options, args = parser.parse_args()
 147     if len(args) > 2:
 148         parser.error('Incorrect number of arguments')
 149     elif len(args) == 0:
 150         args = (os.getcwd(),)
 151     return options, args
 152
 153
 154 class TestImporter(object):
 155
 156     def __init__(self, host, dir_to_import, top_of_repo, options):
 157         self.host = host
 158         self.dir_to_import = dir_to_import
 159         self.top_of_repo = top_of_repo
 160         self.options = options
 161
 162         self.filesystem = self.host.filesystem
 163         self.webkit_finder = WebKitFinder(self.filesystem)
 164         self._webkit_root = self.webkit_finder.webkit_base()
 165         self.layout_tests_dir = self.webkit_finder.path_from_webkit_base('LayoutTests')
 166         self.destination_directory = self.filesystem.normpath(self.filesystem.join(self.layout_tests_dir, options.destination,
 167                                                                                    self.filesystem.basename(self.top_of_repo)))
 168         self.import_in_place = (self.dir_to_import == self.destination_directory)
 169         self.dir_above_repo = self.filesystem.dirname(self.top_of_repo)
 170
 171         self.changeset = CHANGESET_NOT_AVAILABLE
 172
 173         self.import_list = []
 174
 175     def do_import(self):
 176         _log.info("Importing %s into %s", self.dir_to_import, self.destination_directory)
 177         self.find_importable_tests(self.dir_to_import)
 178         self.load_changeset()
 179         self.import_tests()
 180
 181     def load_changeset(self):
 182         """Returns the current changeset from mercurial or "Not Available"."""
 183         try:
 184             self.changeset = self.host.executive.run_command(['hg', 'tip']).split('changeset:')[1]
 185         except (OSError, ScriptError):
 186             self.changeset = CHANGESET_NOT_AVAILABLE
 187
 188     def find_importable_tests(self, directory):
 189         # FIXME: use filesystem
 190         paths_to_skip = self.find_paths_to_skip()
 191
 192         for root, dirs, files in os.walk(directory):
 193             cur_dir = root.replace(self.dir_above_repo + '/', '') + '/'
 194             _log.info('  scanning ' + cur_dir + '...')
 195             total_tests = 0
 196             reftests = 0
 197             jstests = 0
 198
 199             DIRS_TO_SKIP = ('.git', '.hg')
 200             if dirs:
 201                 for d in DIRS_TO_SKIP:
 202                     if d in dirs:
 203                         dirs.remove(d)
 204
 205                 for path in paths_to_skip:
 206                     path_base = path.replace(self.options.destination + '/', '')
 207                     path_base = path_base.replace(cur_dir, '')
 208                     path_full = self.filesystem.join(root, path_base)
 209                     if path_base in dirs:
 210                         dirs.remove(path_base)
 211                         if not self.options.dry_run and self.import_in_place:
 212                             _log.info("  pruning %s" % path_base)
 213                             self.filesystem.rmtree(path_full)
 214                         else:
 215                             _log.info("  skipping %s" % path_base)
 216
 217
 218             copy_list = []
 219
 220             for filename in files:
 221                 path_full = self.filesystem.join(root, filename)
 222                 path_base = path_full.replace(self.layout_tests_dir + '/', '')
 223                 if path_base in paths_to_skip:
 224                     if not self.options.dry_run and self.import_in_place:
 225                         _log.info("  pruning %s" % path_base)
 226                         self.filesystem.remove(path_full)
 227                         continue
 228                     else:
 229                         continue
 230                 # FIXME: This block should really be a separate function, but the early-continues make that difficult.
 231
 232                 if filename.startswith('.') or filename.endswith('.pl'):
 233                     continue  # For some reason the w3c repo contains random perl scripts we don't care about.
 234
 235                 fullpath = os.path.join(root, filename)
 236
 237                 mimetype = mimetypes.guess_type(fullpath)
 238                 if not 'html' in str(mimetype[0]) and not 'application/xhtml+xml' in str(mimetype[0]) and not 'application/xml' in str(mimetype[0]):
 239                     copy_list.append({'src': fullpath, 'dest': filename})
 240                     continue
 241
 242                 if root.endswith('resources'):
 243                     copy_list.append({'src': fullpath, 'dest': filename})
 244                     continue
 245
 246                 test_parser = TestParser(vars(self.options), filename=fullpath)
 247                 test_info = test_parser.analyze_test()
 248                 if test_info is None:
 249                     continue
 250
 251                 if 'reference' in test_info.keys():
 252                     reftests += 1
 253                     total_tests += 1
 254                     test_basename = os.path.basename(test_info['test'])
 255
 256                     # Add the ref file, following WebKit style.
 257                     # FIXME: Ideally we'd support reading the metadata
 258                     # directly rather than relying  on a naming convention.
 259                     # Using a naming convention creates duplicate copies of the
 260                     # reference files.
 261                     ref_file = os.path.splitext(test_basename)[0] + '-expected'
 262                     ref_file += os.path.splitext(test_basename)[1]
 263
 264                     copy_list.append({'src': test_info['reference'], 'dest': ref_file, 'reference_support_info': test_info['reference_support_info']})
 265                     copy_list.append({'src': test_info['test'], 'dest': filename})
 266
 267                 elif 'jstest' in test_info.keys():
 268                     jstests += 1
 269                     total_tests += 1
 270                     copy_list.append({'src': fullpath, 'dest': filename})
 271                 else:
 272                     total_tests += 1
 273                     copy_list.append({'src': fullpath, 'dest': filename})
 274
 275             if copy_list:
 276                 # Only add this directory to the list if there's something to import
 277                 self.import_list.append({'dirname': root, 'copy_list': copy_list,
 278                     'reftests': reftests, 'jstests': jstests, 'total_tests': total_tests})
 279
 280     def find_paths_to_skip(self):
 281         if self.options.ignore_expectations:
 282             return set()
 283
 284         paths_to_skip = set()
 285         port = self.host.port_factory.get()
 286         w3c_import_expectations_path = self.webkit_finder.path_from_webkit_base('LayoutTests', 'W3CImportExpectations')
 287         w3c_import_expectations = self.filesystem.read_text_file(w3c_import_expectations_path)
 288         parser = TestExpectationParser(port, full_test_list=(), is_lint_mode=False)
 289         expectation_lines = parser.parse(w3c_import_expectations_path, w3c_import_expectations)
 290         for line in expectation_lines:
 291             if 'SKIP' in line.expectations:
 292                 if line.specifiers:
 293                     _log.warning("W3CImportExpectations:%s should not have any specifiers" % line.line_numbers)
 294                     continue
 295                 paths_to_skip.add(line.name)
 296         return paths_to_skip
 297
 298     def import_tests(self):
 299         total_imported_tests = 0
 300         total_imported_reftests = 0
 301         total_imported_jstests = 0
 302         total_prefixed_properties = {}
 303
 304         for dir_to_copy in self.import_list:
 305             total_imported_tests += dir_to_copy['total_tests']
 306             total_imported_reftests += dir_to_copy['reftests']
 307             total_imported_jstests += dir_to_copy['jstests']
 308
 309             prefixed_properties = []
 310
 311             if not dir_to_copy['copy_list']:
 312                 continue
 313
 314             orig_path = dir_to_copy['dirname']
 315
 316             subpath = os.path.relpath(orig_path, self.top_of_repo)
 317             new_path = os.path.join(self.destination_directory, subpath)
 318
 319             if not(os.path.exists(new_path)):
 320                 os.makedirs(new_path)
 321
 322             copied_files = []
 323
 324             for file_to_copy in dir_to_copy['copy_list']:
 325                 # FIXME: Split this block into a separate function.
 326                 orig_filepath = os.path.normpath(file_to_copy['src'])
 327
 328                 if os.path.isdir(orig_filepath):
 329                     # FIXME: Figure out what is triggering this and what to do about it.
 330                     _log.error('%s refers to a directory' % orig_filepath)
 331                     continue
 332
 333                 if not(os.path.exists(orig_filepath)):
 334                     _log.warning('%s not found. Possible error in the test.', orig_filepath)
 335                     continue
 336
 337                 new_filepath = os.path.join(new_path, file_to_copy['dest'])
 338                 if 'reference_support_info' in file_to_copy.keys() and file_to_copy['reference_support_info'] != {}:
 339                     reference_support_info = file_to_copy['reference_support_info']
 340                 else:
 341                     reference_support_info = None
 342
 343                 if not(os.path.exists(os.path.dirname(new_filepath))):
 344                     if not self.import_in_place and not self.options.dry_run:
 345                         os.makedirs(os.path.dirname(new_filepath))
 346
 347                 relpath = os.path.relpath(new_filepath, self.layout_tests_dir)
 348                 if not self.options.overwrite and os.path.exists(new_filepath):
 349                     _log.info('  skipping %s' % relpath)
 350                 else:
 351                     # FIXME: Maybe doing a file diff is in order here for existing files?
 352                     # In other words, there's no sense in overwriting identical files, but
 353                     # there's no harm in copying the identical thing.
 354                     _log.info('  %s' % relpath)
 355
 356                 # Only html, xml, or css should be converted
 357                 # FIXME: Eventually, so should js when support is added for this type of conversion
 358                 mimetype = mimetypes.guess_type(orig_filepath)
 359                 if 'html' in str(mimetype[0]) or 'xml' in str(mimetype[0])  or 'css' in str(mimetype[0]):
 360                     converted_file = convert_for_webkit(new_path, filename=orig_filepath, reference_support_info=reference_support_info)
 361
 362                     if not converted_file:
 363                         if not self.import_in_place and not self.options.dry_run:
 364                             shutil.copyfile(orig_filepath, new_filepath)  # The file was unmodified.
 365                     else:
 366                         for prefixed_property in converted_file[0]:
 367                             total_prefixed_properties.setdefault(prefixed_property, 0)
 368                             total_prefixed_properties[prefixed_property] += 1
 369
 370                         prefixed_properties.extend(set(converted_file[0]) - set(prefixed_properties))
 371                         if not self.options.dry_run:
 372                             outfile = open(new_filepath, 'wb')
 373                             outfile.write(converted_file[1])
 374                             outfile.close()
 375                 else:
 376                     if not self.import_in_place and not self.options.dry_run:
 377                         shutil.copyfile(orig_filepath, new_filepath)
 378
 379                 copied_files.append(new_filepath.replace(self._webkit_root, ''))
 380
 381         _log.info('')
 382         _log.info('Import complete')
 383         _log.info('')
 384         _log.info('IMPORTED %d TOTAL TESTS', total_imported_tests)
 385         _log.info('Imported %d reftests', total_imported_reftests)
 386         _log.info('Imported %d JS tests', total_imported_jstests)
 387         _log.info('Imported %d pixel/manual tests', total_imported_tests - total_imported_jstests - total_imported_reftests)
 388         _log.info('')
 389
 390         if total_prefixed_properties:
 391             _log.info('Properties needing prefixes (by count):')
 392             for prefixed_property in sorted(total_prefixed_properties, key=lambda p: total_prefixed_properties[p]):
 393                 _log.info('  %s: %s', prefixed_property, total_prefixed_properties[prefixed_property])
 394
 395     def setup_destination_directory(self):
 396         """ Creates a destination directory that mirrors that of the source directory """
 397
 398         new_subpath = self.dir_to_import[len(self.top_of_repo):]
 399
 400         destination_directory = os.path.join(self.destination_directory, new_subpath)
 401
 402         if not os.path.exists(destination_directory):
 403             os.makedirs(destination_directory)
 404
 405         _log.info('Tests will be imported into: %s', destination_directory)