From 54be909aa08de1f9622b5f3d199f5d3b65bb29a4 Mon Sep 17 00:00:00 2001 From: Serge Guelton Date: Thu, 28 Feb 2019 19:16:17 +0000 Subject: [PATCH] Add Support for Creating and Deleting Unicode Files and Directories in Lit This enables lit to work with unicode file names via mkdir, rm, and redirection. Lit still uses utf-8 internally, but converts to utf-16 on Windows, or just utf-8 bytes on everything else. Committed on behalf of Jason Mittertreiner Differential Revision: https://reviews.llvm.org/D56754 llvm-svn: 355122 --- llvm/utils/lit/lit/TestRunner.py | 25 ++++++++++++++++------ llvm/utils/lit/lit/util.py | 14 ++++++++++++ .../lit/tests/Inputs/shtest-shell/rm-unicode-0.txt | 7 ++++++ llvm/utils/lit/tests/shtest-shell.py | 1 + 4 files changed, 40 insertions(+), 7 deletions(-) create mode 100644 llvm/utils/lit/tests/Inputs/shtest-shell/rm-unicode-0.txt diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py index 2e885b4..55d16b3 100644 --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -23,7 +23,7 @@ from lit.ShCommands import GlobItem import lit.ShUtil as ShUtil import lit.Test as Test import lit.util -from lit.util import to_bytes, to_string +from lit.util import to_bytes, to_string, to_unicode from lit.BooleanExpression import BooleanExpression class InternalShellError(Exception): @@ -344,8 +344,11 @@ def executeBuiltinMkdir(cmd, cmd_shenv): stderr = StringIO() exitCode = 0 for dir in args: + cwd = cmd_shenv.cwd + dir = to_unicode(dir) if kIsWindows else to_bytes(dir) + cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd) if not os.path.isabs(dir): - dir = os.path.realpath(os.path.join(cmd_shenv.cwd, dir)) + dir = os.path.realpath(os.path.join(cwd, dir)) if parent: lit.util.mkdir_p(dir) else: @@ -598,8 +601,11 @@ def executeBuiltinRm(cmd, cmd_shenv): stderr = StringIO() exitCode = 0 for path in args: + cwd = cmd_shenv.cwd + path = to_unicode(path) if kIsWindows else to_bytes(path) + cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd) if not os.path.isabs(path): - path = os.path.realpath(os.path.join(cmd_shenv.cwd, path)) + path = os.path.realpath(os.path.join(cwd, path)) if force and not os.path.exists(path): continue try: @@ -695,6 +701,8 @@ def processRedirects(cmd, stdin_source, cmd_shenv, opened_files): else: # Make sure relative paths are relative to the cwd. redir_filename = os.path.join(cmd_shenv.cwd, name) + redir_filename = to_unicode(redir_filename) \ + if kIsWindows else to_bytes(redir_filename) fd = open(redir_filename, mode) # Workaround a Win32 and/or subprocess bug when appending. # @@ -1096,11 +1104,14 @@ def executeScript(test, litConfig, tmpBase, commands, cwd): for i, ln in enumerate(commands): commands[i] = re.sub(kPdbgRegex, ": '\\1'; ", ln) if test.config.pipefail: - f.write('set -o pipefail;') + f.write(b'set -o pipefail;' if mode == 'wb' else 'set -o pipefail;') if litConfig.echo_all_commands: - f.write('set -x;') - f.write('{ ' + '; } &&\n{ '.join(commands) + '; }') - f.write('\n') + f.write(b'set -x;' if mode == 'wb' else 'set -x;') + if sys.version_info > (3,0) and mode == 'wb': + f.write(bytes('{ ' + '; } &&\n{ '.join(commands) + '; }', 'utf-8')) + else: + f.write('{ ' + '; } &&\n{ '.join(commands) + '; }') + f.write(b'\n' if mode == 'wb' else '\n') f.close() if isWin32CMDEXE: diff --git a/llvm/utils/lit/lit/util.py b/llvm/utils/lit/lit/util.py index 58b5563..44de061 100644 --- a/llvm/utils/lit/lit/util.py +++ b/llvm/utils/lit/lit/util.py @@ -102,6 +102,20 @@ def to_string(b): raise TypeError('not sure how to convert %s to %s' % (type(b), str)) +def to_unicode(s): + """Return the parameter as type which supports unicode, possibly decoding + it. + + In Python2, this is the unicode type. In Python3 it's the str type. + + """ + if isinstance(s, bytes): + # In Python2, this branch is taken for both 'str' and 'bytes'. + # In Python3, this branch is taken only for 'bytes'. + return s.decode('utf-8') + return s + + def detectCPUs(): """Detects the number of CPUs on a system. diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/rm-unicode-0.txt b/llvm/utils/lit/tests/Inputs/shtest-shell/rm-unicode-0.txt new file mode 100644 index 0000000..e34c96c --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-shell/rm-unicode-0.txt @@ -0,0 +1,7 @@ +# Check removing unicode +# +# RUN: mkdir -p Output/中文 +# RUN: echo "" > Output/中文/你好.txt +# RUN: rm Output/中文/你好.txt +# RUN: echo "" > Output/中文/你好.txt +# RUN: rm -r Output/中文 diff --git a/llvm/utils/lit/tests/shtest-shell.py b/llvm/utils/lit/tests/shtest-shell.py index c725e8a..f947525 100644 --- a/llvm/utils/lit/tests/shtest-shell.py +++ b/llvm/utils/lit/tests/shtest-shell.py @@ -224,6 +224,7 @@ # CHECK: Exit Code: 1 # CHECK: *** +# CHECK: PASS: shtest-shell :: rm-unicode-0.txt # CHECK: PASS: shtest-shell :: sequencing-0.txt # CHECK: XFAIL: shtest-shell :: sequencing-1.txt # CHECK: PASS: shtest-shell :: valid-shell.txt -- 2.7.4