From 2152ae985c28ff10e6f4d4b674ae9b144ecb04c0 Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" Date: Mon, 16 Sep 2019 21:22:29 +0000 Subject: [PATCH] [lit] Make internal diff work in pipelines When using lit's internal shell, RUN lines like the following accidentally execute an external `diff` instead of lit's internal `diff`: ``` # RUN: program | diff file - # RUN: not diff file1 file2 | FileCheck %s ``` Such cases exist now, in `clang/test/Analysis` for example. We are preparing patches to ensure lit's internal `diff` is called in such cases, which will then fail because lit's internal `diff` cannot currently be used in pipelines and doesn't recognize `-` as a command-line option. To enable pipelines, this patch moves lit's `diff` implementation into an out-of-process script, similar to lit's `cat` implementation. A follow-up patch will implement `-` to mean stdin. Reviewed By: probinson, stella.stamenova Differential Revision: https://reviews.llvm.org/D66574 llvm-svn: 372035 --- llvm/utils/lit/lit/TestRunner.py | 224 +------------------- llvm/utils/lit/lit/builtin_commands/diff.py | 228 +++++++++++++++++++++ .../lit/tests/Inputs/shtest-shell/diff-error-0.txt | 3 - .../lit/tests/Inputs/shtest-shell/diff-pipes.txt | 15 ++ llvm/utils/lit/tests/shtest-shell.py | 47 +++-- 5 files changed, 276 insertions(+), 241 deletions(-) create mode 100644 llvm/utils/lit/lit/builtin_commands/diff.py delete mode 100644 llvm/utils/lit/tests/Inputs/shtest-shell/diff-error-0.txt create mode 100644 llvm/utils/lit/tests/Inputs/shtest-shell/diff-pipes.txt diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py index 60c6979..4946d1d 100644 --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -1,7 +1,5 @@ from __future__ import absolute_import -import difflib import errno -import functools import io import itertools import getopt @@ -361,218 +359,6 @@ def executeBuiltinMkdir(cmd, cmd_shenv): exitCode = 1 return ShellCommandResult(cmd, "", stderr.getvalue(), exitCode, False) -def executeBuiltinDiff(cmd, cmd_shenv): - """executeBuiltinDiff - Compare files line by line.""" - args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:] - try: - opts, args = getopt.gnu_getopt(args, "wbur", ["strip-trailing-cr"]) - except getopt.GetoptError as err: - raise InternalShellError(cmd, "Unsupported: 'diff': %s" % str(err)) - - filelines, filepaths, dir_trees = ([] for i in range(3)) - ignore_all_space = False - ignore_space_change = False - unified_diff = False - recursive_diff = False - strip_trailing_cr = False - for o, a in opts: - if o == "-w": - ignore_all_space = True - elif o == "-b": - ignore_space_change = True - elif o == "-u": - unified_diff = True - elif o == "-r": - recursive_diff = True - elif o == "--strip-trailing-cr": - strip_trailing_cr = True - else: - assert False, "unhandled option" - - if len(args) != 2: - raise InternalShellError(cmd, "Error: missing or extra operand") - - def getDirTree(path, basedir=""): - # Tree is a tuple of form (dirname, child_trees). - # An empty dir has child_trees = [], a file has child_trees = None. - child_trees = [] - for dirname, child_dirs, files in os.walk(os.path.join(basedir, path)): - for child_dir in child_dirs: - child_trees.append(getDirTree(child_dir, dirname)) - for filename in files: - child_trees.append((filename, None)) - return path, sorted(child_trees) - - def compareTwoFiles(filepaths): - compare_bytes = False - encoding = None - filelines = [] - for file in filepaths: - try: - with open(file, 'r') as f: - filelines.append(f.readlines()) - except UnicodeDecodeError: - try: - with io.open(file, 'r', encoding="utf-8") as f: - filelines.append(f.readlines()) - encoding = "utf-8" - except: - compare_bytes = True - - if compare_bytes: - return compareTwoBinaryFiles(filepaths) - else: - return compareTwoTextFiles(filepaths, encoding) - - def compareTwoBinaryFiles(filepaths): - filelines = [] - for file in filepaths: - with open(file, 'rb') as f: - filelines.append(f.readlines()) - - exitCode = 0 - if hasattr(difflib, 'diff_bytes'): - # python 3.5 or newer - diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0], filelines[1], filepaths[0].encode(), filepaths[1].encode()) - diffs = [diff.decode() for diff in diffs] - else: - # python 2.7 - func = difflib.unified_diff if unified_diff else difflib.context_diff - diffs = func(filelines[0], filelines[1], filepaths[0], filepaths[1]) - - for diff in diffs: - stdout.write(diff) - exitCode = 1 - return exitCode - - def compareTwoTextFiles(filepaths, encoding): - filelines = [] - for file in filepaths: - if encoding is None: - with open(file, 'r') as f: - filelines.append(f.readlines()) - else: - with io.open(file, 'r', encoding=encoding) as f: - filelines.append(f.readlines()) - - exitCode = 0 - def compose2(f, g): - return lambda x: f(g(x)) - - f = lambda x: x - if strip_trailing_cr: - f = compose2(lambda line: line.rstrip('\r'), f) - if ignore_all_space or ignore_space_change: - ignoreSpace = lambda line, separator: separator.join(line.split()) - ignoreAllSpaceOrSpaceChange = functools.partial(ignoreSpace, separator='' if ignore_all_space else ' ') - f = compose2(ignoreAllSpaceOrSpaceChange, f) - - for idx, lines in enumerate(filelines): - filelines[idx]= [f(line) for line in lines] - - func = difflib.unified_diff if unified_diff else difflib.context_diff - for diff in func(filelines[0], filelines[1], filepaths[0], filepaths[1]): - stdout.write(diff) - exitCode = 1 - return exitCode - - def printDirVsFile(dir_path, file_path): - if os.path.getsize(file_path): - msg = "File %s is a directory while file %s is a regular file" - else: - msg = "File %s is a directory while file %s is a regular empty file" - stdout.write(msg % (dir_path, file_path) + "\n") - - def printFileVsDir(file_path, dir_path): - if os.path.getsize(file_path): - msg = "File %s is a regular file while file %s is a directory" - else: - msg = "File %s is a regular empty file while file %s is a directory" - stdout.write(msg % (file_path, dir_path) + "\n") - - def printOnlyIn(basedir, path, name): - stdout.write("Only in %s: %s\n" % (os.path.join(basedir, path), name)) - - def compareDirTrees(dir_trees, base_paths=["", ""]): - # Dirnames of the trees are not checked, it's caller's responsibility, - # as top-level dirnames are always different. Base paths are important - # for doing os.walk, but we don't put it into tree's dirname in order - # to speed up string comparison below and while sorting in getDirTree. - left_tree, right_tree = dir_trees[0], dir_trees[1] - left_base, right_base = base_paths[0], base_paths[1] - - # Compare two files or report file vs. directory mismatch. - if left_tree[1] is None and right_tree[1] is None: - return compareTwoFiles([os.path.join(left_base, left_tree[0]), - os.path.join(right_base, right_tree[0])]) - - if left_tree[1] is None and right_tree[1] is not None: - printFileVsDir(os.path.join(left_base, left_tree[0]), - os.path.join(right_base, right_tree[0])) - return 1 - - if left_tree[1] is not None and right_tree[1] is None: - printDirVsFile(os.path.join(left_base, left_tree[0]), - os.path.join(right_base, right_tree[0])) - return 1 - - # Compare two directories via recursive use of compareDirTrees. - exitCode = 0 - left_names = [node[0] for node in left_tree[1]] - right_names = [node[0] for node in right_tree[1]] - l, r = 0, 0 - while l < len(left_names) and r < len(right_names): - # Names are sorted in getDirTree, rely on that order. - if left_names[l] < right_names[r]: - exitCode = 1 - printOnlyIn(left_base, left_tree[0], left_names[l]) - l += 1 - elif left_names[l] > right_names[r]: - exitCode = 1 - printOnlyIn(right_base, right_tree[0], right_names[r]) - r += 1 - else: - exitCode |= compareDirTrees([left_tree[1][l], right_tree[1][r]], - [os.path.join(left_base, left_tree[0]), - os.path.join(right_base, right_tree[0])]) - l += 1 - r += 1 - - # At least one of the trees has ended. Report names from the other tree. - while l < len(left_names): - exitCode = 1 - printOnlyIn(left_base, left_tree[0], left_names[l]) - l += 1 - while r < len(right_names): - exitCode = 1 - printOnlyIn(right_base, right_tree[0], right_names[r]) - r += 1 - return exitCode - - stderr = StringIO() - stdout = StringIO() - exitCode = 0 - try: - for file in args: - if not os.path.isabs(file): - file = os.path.realpath(os.path.join(cmd_shenv.cwd, file)) - - if recursive_diff: - dir_trees.append(getDirTree(file)) - else: - filepaths.append(file) - - if not recursive_diff: - exitCode = compareTwoFiles(filepaths) - else: - exitCode = compareDirTrees(dir_trees) - - except IOError as err: - stderr.write("Error: 'diff' command failed, %s\n" % str(err)) - exitCode = 1 - - return ShellCommandResult(cmd, stdout.getvalue(), stderr.getvalue(), exitCode, False) - def executeBuiltinRm(cmd, cmd_shenv): """executeBuiltinRm - Removes (deletes) files or directories.""" args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:] @@ -838,14 +624,6 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper): results.append(cmdResult) return cmdResult.exitCode - if cmd.commands[0].args[0] == 'diff': - if len(cmd.commands) != 1: - raise InternalShellError(cmd.commands[0], "Unsupported: 'diff' " - "cannot be part of a pipeline") - cmdResult = executeBuiltinDiff(cmd.commands[0], shenv) - results.append(cmdResult) - return cmdResult.exitCode - if cmd.commands[0].args[0] == 'rm': if len(cmd.commands) != 1: raise InternalShellError(cmd.commands[0], "Unsupported: 'rm' " @@ -866,7 +644,7 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper): stderrTempFiles = [] opened_files = [] named_temp_files = [] - builtin_commands = set(['cat']) + builtin_commands = set(['cat', 'diff']) builtin_commands_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "builtin_commands") # To avoid deadlock, we use a single stderr stream for piped # output. This is null until we have seen some output using diff --git a/llvm/utils/lit/lit/builtin_commands/diff.py b/llvm/utils/lit/lit/builtin_commands/diff.py new file mode 100644 index 0000000..885b425 --- /dev/null +++ b/llvm/utils/lit/lit/builtin_commands/diff.py @@ -0,0 +1,228 @@ +import difflib +import functools +import getopt +import os +import sys + +class DiffFlags(): + def __init__(self): + self.ignore_all_space = False + self.ignore_space_change = False + self.unified_diff = False + self.recursive_diff = False + self.strip_trailing_cr = False + +def getDirTree(path, basedir=""): + # Tree is a tuple of form (dirname, child_trees). + # An empty dir has child_trees = [], a file has child_trees = None. + child_trees = [] + for dirname, child_dirs, files in os.walk(os.path.join(basedir, path)): + for child_dir in child_dirs: + child_trees.append(getDirTree(child_dir, dirname)) + for filename in files: + child_trees.append((filename, None)) + return path, sorted(child_trees) + +def compareTwoFiles(flags, filepaths): + compare_bytes = False + encoding = None + filelines = [] + for file in filepaths: + try: + with open(file, 'r') as f: + filelines.append(f.readlines()) + except UnicodeDecodeError: + try: + with io.open(file, 'r', encoding="utf-8") as f: + filelines.append(f.readlines()) + encoding = "utf-8" + except: + compare_bytes = True + + if compare_bytes: + return compareTwoBinaryFiles(flags, filepaths) + else: + return compareTwoTextFiles(flags, filepaths, encoding) + +def compareTwoBinaryFiles(flags, filepaths): + filelines = [] + for file in filepaths: + with open(file, 'rb') as f: + filelines.append(f.readlines()) + + exitCode = 0 + if hasattr(difflib, 'diff_bytes'): + # python 3.5 or newer + diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0], filelines[1], filepaths[0].encode(), filepaths[1].encode()) + diffs = [diff.decode() for diff in diffs] + else: + # python 2.7 + if flags.unified_diff: + func = difflib.unified_diff + else: + func = difflib.context_diff + diffs = func(filelines[0], filelines[1], filepaths[0], filepaths[1]) + + for diff in diffs: + sys.stdout.write(diff) + exitCode = 1 + return exitCode + +def compareTwoTextFiles(flags, filepaths, encoding): + filelines = [] + for file in filepaths: + if encoding is None: + with open(file, 'r') as f: + filelines.append(f.readlines()) + else: + with io.open(file, 'r', encoding=encoding) as f: + filelines.append(f.readlines()) + + exitCode = 0 + def compose2(f, g): + return lambda x: f(g(x)) + + f = lambda x: x + if flags.strip_trailing_cr: + f = compose2(lambda line: line.rstrip('\r'), f) + if flags.ignore_all_space or flags.ignore_space_change: + ignoreSpace = lambda line, separator: separator.join(line.split()) + ignoreAllSpaceOrSpaceChange = functools.partial(ignoreSpace, separator='' if flags.ignore_all_space else ' ') + f = compose2(ignoreAllSpaceOrSpaceChange, f) + + for idx, lines in enumerate(filelines): + filelines[idx]= [f(line) for line in lines] + + func = difflib.unified_diff if flags.unified_diff else difflib.context_diff + for diff in func(filelines[0], filelines[1], filepaths[0], filepaths[1]): + sys.stdout.write(diff) + exitCode = 1 + return exitCode + +def printDirVsFile(dir_path, file_path): + if os.path.getsize(file_path): + msg = "File %s is a directory while file %s is a regular file" + else: + msg = "File %s is a directory while file %s is a regular empty file" + sys.stdout.write(msg % (dir_path, file_path) + "\n") + +def printFileVsDir(file_path, dir_path): + if os.path.getsize(file_path): + msg = "File %s is a regular file while file %s is a directory" + else: + msg = "File %s is a regular empty file while file %s is a directory" + sys.stdout.write(msg % (file_path, dir_path) + "\n") + +def printOnlyIn(basedir, path, name): + sys.stdout.write("Only in %s: %s\n" % (os.path.join(basedir, path), name)) + +def compareDirTrees(flags, dir_trees, base_paths=["", ""]): + # Dirnames of the trees are not checked, it's caller's responsibility, + # as top-level dirnames are always different. Base paths are important + # for doing os.walk, but we don't put it into tree's dirname in order + # to speed up string comparison below and while sorting in getDirTree. + left_tree, right_tree = dir_trees[0], dir_trees[1] + left_base, right_base = base_paths[0], base_paths[1] + + # Compare two files or report file vs. directory mismatch. + if left_tree[1] is None and right_tree[1] is None: + return compareTwoFiles(flags, + [os.path.join(left_base, left_tree[0]), + os.path.join(right_base, right_tree[0])]) + + if left_tree[1] is None and right_tree[1] is not None: + printFileVsDir(os.path.join(left_base, left_tree[0]), + os.path.join(right_base, right_tree[0])) + return 1 + + if left_tree[1] is not None and right_tree[1] is None: + printDirVsFile(os.path.join(left_base, left_tree[0]), + os.path.join(right_base, right_tree[0])) + return 1 + + # Compare two directories via recursive use of compareDirTrees. + exitCode = 0 + left_names = [node[0] for node in left_tree[1]] + right_names = [node[0] for node in right_tree[1]] + l, r = 0, 0 + while l < len(left_names) and r < len(right_names): + # Names are sorted in getDirTree, rely on that order. + if left_names[l] < right_names[r]: + exitCode = 1 + printOnlyIn(left_base, left_tree[0], left_names[l]) + l += 1 + elif left_names[l] > right_names[r]: + exitCode = 1 + printOnlyIn(right_base, right_tree[0], right_names[r]) + r += 1 + else: + exitCode |= compareDirTrees(flags, + [left_tree[1][l], right_tree[1][r]], + [os.path.join(left_base, left_tree[0]), + os.path.join(right_base, right_tree[0])]) + l += 1 + r += 1 + + # At least one of the trees has ended. Report names from the other tree. + while l < len(left_names): + exitCode = 1 + printOnlyIn(left_base, left_tree[0], left_names[l]) + l += 1 + while r < len(right_names): + exitCode = 1 + printOnlyIn(right_base, right_tree[0], right_names[r]) + r += 1 + return exitCode + +def main(argv): + args = argv[1:] + try: + opts, args = getopt.gnu_getopt(args, "wbur", ["strip-trailing-cr"]) + except getopt.GetoptError as err: + sys.stderr.write("Unsupported: 'diff': %s\n" % str(err)) + sys.exit(1) + + flags = DiffFlags() + filelines, filepaths, dir_trees = ([] for i in range(3)) + for o, a in opts: + if o == "-w": + flags.ignore_all_space = True + elif o == "-b": + flags.ignore_space_change = True + elif o == "-u": + flags.unified_diff = True + elif o == "-r": + flags.recursive_diff = True + elif o == "--strip-trailing-cr": + flags.strip_trailing_cr = True + else: + assert False, "unhandled option" + + if len(args) != 2: + sys.stderr.write("Error: missing or extra operand\n") + sys.exit(1) + + exitCode = 0 + try: + for file in args: + if not os.path.isabs(file): + file = os.path.realpath(os.path.join(os.getcwd(), file)) + + if flags.recursive_diff: + dir_trees.append(getDirTree(file)) + else: + filepaths.append(file) + + if not flags.recursive_diff: + exitCode = compareTwoFiles(flags, filepaths) + else: + exitCode = compareDirTrees(flags, dir_trees) + + except IOError as err: + sys.stderr.write("Error: 'diff' command failed, %s\n" % str(err)) + exitCode = 1 + + sys.exit(exitCode) + +if __name__ == "__main__": + main(sys.argv) diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/diff-error-0.txt b/llvm/utils/lit/tests/Inputs/shtest-shell/diff-error-0.txt deleted file mode 100644 index 81888cf..0000000 --- a/llvm/utils/lit/tests/Inputs/shtest-shell/diff-error-0.txt +++ /dev/null @@ -1,3 +0,0 @@ -# Check error on a unsupported diff (cannot be part of a pipeline). -# -# RUN: diff diff-error-0.txt diff-error-0.txt | echo Output diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/diff-pipes.txt b/llvm/utils/lit/tests/Inputs/shtest-shell/diff-pipes.txt new file mode 100644 index 0000000..ce0abca --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-shell/diff-pipes.txt @@ -0,0 +1,15 @@ +# RUN: echo foo > %t.foo +# RUN: echo bar > %t.bar + +# Check output pipe. +# RUN: diff %t.foo %t.foo | FileCheck -allow-empty -check-prefix=EMPTY %s +# RUN: diff -u %t.foo %t.bar | FileCheck %s && false || true + +# Fail so lit will print output. +# RUN: false + +# CHECK: @@ +# CHECK-NEXT: -foo +# CHECK-NEXT: +bar + +# EMPTY-NOT: {{.}} diff --git a/llvm/utils/lit/tests/shtest-shell.py b/llvm/utils/lit/tests/shtest-shell.py index f947525..3978e44 100644 --- a/llvm/utils/lit/tests/shtest-shell.py +++ b/llvm/utils/lit/tests/shtest-shell.py @@ -34,28 +34,20 @@ # CHECK: error: command failed with exit status: 127 # CHECK: *** -# CHECK: FAIL: shtest-shell :: diff-error-0.txt -# CHECK: *** TEST 'shtest-shell :: diff-error-0.txt' FAILED *** -# CHECK: $ "diff" "diff-error-0.txt" "diff-error-0.txt" -# CHECK: # command stderr: -# CHECK: Unsupported: 'diff' cannot be part of a pipeline -# CHECK: error: command failed with exit status: 127 -# CHECK: *** - # CHECK: FAIL: shtest-shell :: diff-error-1.txt # CHECK: *** TEST 'shtest-shell :: diff-error-1.txt' FAILED *** # CHECK: $ "diff" "-B" "temp1.txt" "temp2.txt" # CHECK: # command stderr: # CHECK: Unsupported: 'diff': option -B not recognized -# CHECK: error: command failed with exit status: 127 +# CHECK: error: command failed with exit status: 1 # CHECK: *** # CHECK: FAIL: shtest-shell :: diff-error-2.txt # CHECK: *** TEST 'shtest-shell :: diff-error-2.txt' FAILED *** # CHECK: $ "diff" "temp.txt" # CHECK: # command stderr: -# CHECK: Error: missing or extra operand -# CHECK: error: command failed with exit status: 127 +# CHECK: Error: missing or extra operand +# CHECK: error: command failed with exit status: 1 # CHECK: *** # CHECK: FAIL: shtest-shell :: diff-error-3.txt @@ -82,18 +74,43 @@ # CHECK: *** TEST 'shtest-shell :: diff-error-5.txt' FAILED *** # CHECK: $ "diff" # CHECK: # command stderr: -# CHECK: Error: missing or extra operand -# CHECK: error: command failed with exit status: 127 +# CHECK: Error: missing or extra operand +# CHECK: error: command failed with exit status: 1 # CHECK: *** # CHECK: FAIL: shtest-shell :: diff-error-6.txt # CHECK: *** TEST 'shtest-shell :: diff-error-6.txt' FAILED *** # CHECK: $ "diff" # CHECK: # command stderr: -# CHECK: Error: missing or extra operand -# CHECK: error: command failed with exit status: 127 +# CHECK: Error: missing or extra operand +# CHECK: error: command failed with exit status: 1 # CHECK: *** + +# CHECK: FAIL: shtest-shell :: diff-pipes.txt + +# CHECK: *** TEST 'shtest-shell :: diff-pipes.txt' FAILED *** + +# CHECK: $ "diff" "{{[^"]*}}.foo" "{{[^"]*}}.foo" +# CHECK-NOT: note +# CHECK-NOT: error +# CHECK: $ "FileCheck" +# CHECK-NOT: note +# CHECK-NOT: error + +# CHECK: $ "diff" "-u" "{{[^"]*}}.foo" "{{[^"]*}}.bar" +# CHECK: note: command had no output on stdout or stderr +# CHECK: error: command failed with exit status: 1 +# CHECK: $ "FileCheck" +# CHECK-NOT: note +# CHECK-NOT: error +# CHECK: $ "true" + +# CHECK: $ "false" + +# CHECK: *** + + # CHECK: FAIL: shtest-shell :: diff-r-error-0.txt # CHECK: *** TEST 'shtest-shell :: diff-r-error-0.txt' FAILED *** # CHECK: $ "diff" "-r" -- 2.7.4