[lit] Remove ANSI control characters from xunit output
authorAlex Richardson <Alexander.Richardson@cl.cam.ac.uk>
Thu, 6 Aug 2020 07:53:47 +0000 (08:53 +0100)
committerAlex Richardson <Alexander.Richardson@cl.cam.ac.uk>
Thu, 6 Aug 2020 08:16:52 +0000 (09:16 +0100)
Failing test output sometimes contains control characters like \x1b (e.g.
if there was some -fcolor-diagnostics output) which are not allowed inside
XML files. This causes problems with CI systems: for example, the Jenkins
JUnit XML will throw an exception when ecountering those characters and
similar problems also occur with GitLab CI.

Reviewed By: yln, jdenny

Differential Revision: https://reviews.llvm.org/D84233

llvm/utils/lit/lit/reports.py
llvm/utils/lit/tests/Inputs/shtest-format/external_shell/fail_with_control_chars.txt [new file with mode: 0644]
llvm/utils/lit/tests/Inputs/shtest-format/external_shell/write-control-chars.py [new file with mode: 0644]
llvm/utils/lit/tests/shtest-format.py

index b43f779..3d4aecf 100755 (executable)
@@ -68,6 +68,20 @@ class JsonReport(object):
             file.write('\n')
 
 
+_invalid_xml_chars_dict = {c: None for c in range(32) if chr(c) not in ('\t', '\n', '\r')}
+
+
+def remove_invalid_xml_chars(s):
+    # According to the XML 1.0 spec, control characters other than
+    # \t,\r, and \n are not permitted anywhere in the document
+    # (https://www.w3.org/TR/xml/#charsets) and therefore this function
+    # removes them to produce a valid XML document.
+    #
+    # Note: In XML 1.1 only \0 is illegal (https://www.w3.org/TR/xml11/#charsets)
+    # but lit currently produces XML 1.0 output.
+    return s.translate(_invalid_xml_chars_dict)
+
+
 class XunitReport(object):
     def __init__(self, output_file):
         self.output_file = output_file
@@ -113,7 +127,15 @@ class XunitReport(object):
             # terminator we wrap it by creating a new CDATA block.
             output = test.result.output.replace(']]>', ']]]]><![CDATA[>')
             if isinstance(output, bytes):
-                output.decode("utf-8", 'ignore')
+                output = output.decode("utf-8", 'ignore')
+
+            # Failing test  output sometimes contains control characters like
+            # \x1b (e.g. if there was some -fcolor-diagnostics output) which are
+            # not allowed inside XML files.
+            # This causes problems with CI systems: for example, the Jenkins
+            # JUnit XML will throw an exception when ecountering those
+            # characters and similar problems also occur with GitLab CI.
+            output = remove_invalid_xml_chars(output)
             file.write(output)
             file.write(']]></failure>\n</testcase>\n')
         elif test.result.code in self.skipped_codes:
diff --git a/llvm/utils/lit/tests/Inputs/shtest-format/external_shell/fail_with_control_chars.txt b/llvm/utils/lit/tests/Inputs/shtest-format/external_shell/fail_with_control_chars.txt
new file mode 100644 (file)
index 0000000..70d5b61
--- /dev/null
@@ -0,0 +1,5 @@
+# Run a command that fails and prints control characters on stdout.
+# This tests checks that the xunit output correctly escapes them in the XML.
+#
+# RUN: %{python} %S/write-control-chars.py
+
diff --git a/llvm/utils/lit/tests/Inputs/shtest-format/external_shell/write-control-chars.py b/llvm/utils/lit/tests/Inputs/shtest-format/external_shell/write-control-chars.py
new file mode 100644 (file)
index 0000000..34f08a0
--- /dev/null
@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+import sys
+
+print("a line with \x1b[2;30;41mcontrol characters\x1b[0m.")
+sys.exit(1)
index aa74a2c..5c48397 100644 (file)
 # CHECK-NEXT: a line with bad encoding:
 # CHECK: --
 
+# CHECK: FAIL: shtest-format :: external_shell/fail_with_control_chars.txt
+# CHECK-NEXT: *** TEST 'shtest-format :: external_shell/fail_with_control_chars.txt' FAILED ***
+# CHECK: Command Output (stdout):
+# CHECK-NEXT: --
+# CHECK-NEXT: a line with {{.*}}control characters{{.*}}.
+# CHECK: --
+
 # CHECK: PASS: shtest-format :: external_shell/pass.txt
 
 # CHECK: FAIL: shtest-format :: fail.txt
 # CHECK-NEXT: true
 # CHECK-NEXT: --
 
-# CHECK: Failed Tests (3)
+# CHECK: Failed Tests (4)
 # CHECK: shtest-format :: external_shell/fail.txt
 # CHECK: shtest-format :: external_shell/fail_with_bad_encoding.txt
+# CHECK: shtest-format :: external_shell/fail_with_control_chars.txt
 # CHECK: shtest-format :: fail.txt
 
 # CHECK: Unexpectedly Passed Tests (1)
 # CHECK: Passed             : 6
 # CHECK: Expectedly Failed  : 4
 # CHECK: Unresolved         : 3
-# CHECK: Failed             : 3
+# CHECK: Failed             : 4
 # CHECK: Unexpectedly Passed: 1
 
 
 # XUNIT: <?xml version="1.0" encoding="UTF-8"?>
 # XUNIT-NEXT: <testsuites time="{{[0-9.]+}}">
-# XUNIT-NEXT: <testsuite name="shtest-format" tests="21" failures="7" skipped="4">
+# XUNIT-NEXT: <testsuite name="shtest-format" tests="22" failures="8" skipped="4">
 
 # XUNIT: <testcase classname="shtest-format.external_shell" name="fail.txt" time="{{[0-9]+\.[0-9]+}}">
 # XUNIT-NEXT: <failure{{[ ]*}}>
 # XUNIT: </failure>
 # XUNIT-NEXT: </testcase>
 
+# XUNIT: <testcase classname="shtest-format.external_shell" name="fail_with_control_chars.txt" time="{{[0-9]+\.[0-9]+}}">
+# XUNIT-NEXT: <failure><![CDATA[Script:
+# XUNIT: Command Output (stdout):
+# XUNIT-NEXT: --
+# XUNIT-NEXT: a line with [2;30;41mcontrol characters[0m.
+# XUNIT: </failure>
+# XUNIT-NEXT: </testcase>
+
 # XUNIT: <testcase classname="shtest-format.external_shell" name="pass.txt" time="{{[0-9]+\.[0-9]+}}"/>
 
 # XUNIT: <testcase classname="shtest-format.shtest-format" name="fail.txt" time="{{[0-9]+\.[0-9]+}}">