validate:launcher: Try to generate a backtrace on segfaults
authorThibault Saunier <thibault.saunier@osg.samsung.com>
Mon, 7 Nov 2016 20:20:09 +0000 (17:20 -0300)
committerThibault Saunier <thibault.saunier@osg.samsung.com>
Wed, 9 Nov 2016 13:19:45 +0000 (10:19 -0300)
validate/launcher/baseclasses.py
validate/launcher/utils.py

index a8c0a3c8c12b356eebb702e9d47a3ccde7c67eff..93daf6a1562b8f243ece55341e69d7e826d2a8ba 100644 (file)
@@ -30,6 +30,7 @@ import time
 from . import utils
 import signal
 import urllib.parse
+import tempfile
 import subprocess
 import threading
 import queue
@@ -40,7 +41,7 @@ from .loggable import Loggable
 import xml.etree.cElementTree as ET
 
 from .utils import mkdir, Result, Colors, printc, DEFAULT_TIMEOUT, GST_SECOND, \
-    Protocols, look_for_file_in_source_dir, get_data_file
+    Protocols, look_for_file_in_source_dir, get_data_file, BackTraceGenerator
 
 # The factor by which we increase the hard timeout when running inside
 # Valgrind
@@ -49,6 +50,10 @@ GDB_TIMEOUT_FACTOR = VALGRIND_TIMEOUT_FACTOR = 20
 VALGRIND_ERROR_CODE = 20
 
 VALIDATE_OVERRIDE_EXTENSION = ".override"
+COREDUMP_SIGNALS = [-signal.SIGQUIT, -signal.SIGILL, -signal.SIGABRT,
+    -signal.SIGFPE, -signal.SIGSEGV, -signal.SIGBUS, -signal.SIGSYS,
+    -signal.SIGTRAP, -signal.SIGXCPU, -signal.SIGXFSZ, -signal.SIGIOT,
+    139]
 
 
 class Test(Loggable):
@@ -196,22 +201,39 @@ class Test(Loggable):
         self.add_env_variable("LD_PRELOAD")
         self.add_env_variable("DISPLAY")
 
+    def add_stack_trace_to_logfile(self):
+        trace_gatherer = BackTraceGenerator.get_default()
+        stack_trace = trace_gatherer.get_trace(self)
+
+        if not stack_trace:
+            return
+
+        info = "\n\n== Segfault informations: == \n%s" % stack_trace
+        if self.options.redirect_logs:
+            print(info)
+        else:
+            with open(self.logfile, 'a') as f:
+                f.write(info)
+
     def set_result(self, result, message="", error=""):
         self.debug("Setting result: %s (message: %s, error: %s)" % (result,
                    message, error))
 
-        if result is Result.TIMEOUT and self.options.debug is True:
-            if self.options.gdb:
-                printc("Timeout, you should process <ctrl>c to get into gdb",
-                       Colors.FAIL)
-                # and wait here until gdb exits
-                self.process.communicate()
-            else:
-                pname = subprocess.check_output(("readlink -e /proc/%s/exe"
-                                                 % self.process.pid).decode().split(' ')).replace('\n', '')
-                input("%sTimeout happened you can attach gdb doing: $gdb %s %d%s\n"
+        if result is Result.TIMEOUT:
+            if self.options.debug is True:
+                if self.options.gdb:
+                    printc("Timeout, you should process <ctrl>c to get into gdb",
+                        Colors.FAIL)
+                    # and wait here until gdb exits
+                    self.process.communicate()
+                else:
+                    pname = subprocess.check_output(("readlink -e /proc/%s/exe"
+                                                    % self.process.pid).decode().split(' ')).replace('\n', '')
+                    input("%sTimeout happened you can attach gdb doing: $gdb %s %d%s\n"
                           "Press enter to continue" % (Colors.FAIL, pname, self.process.pid,
                                                        Colors.ENDC))
+            else:
+                self.add_stack_trace_to_logfile()
 
         self.result = result
         self.message = message
@@ -224,6 +246,10 @@ class Test(Loggable):
         self.debug("%s returncode: %s", self, self.process.returncode)
         if self.process.returncode == 0:
             self.set_result(Result.PASSED)
+        elif self.process.returncode in [-signal.SIGSEGV, -signal.SIGABRT, 139]:
+            result = Result.FAILED
+            msg = "Application segfaulted "
+            self.add_stack_trace_to_file()
         elif self.process.returncode == VALGRIND_ERROR_CODE:
             self.set_result(Result.FAILED, "Valgrind reported errors")
         else:
@@ -768,9 +794,10 @@ class GstValidateTest(Test):
 
         msg = ""
         result = Result.PASSED
-        if self.process.returncode == 139:
+        if self.process.returncode in COREDUMP_SIGNALS:
             result = Result.FAILED
             msg = "Application segfaulted "
+            self.add_stack_trace_to_logfile()
         elif self.process.returncode == VALGRIND_ERROR_CODE:
             msg = "Valgrind reported errors "
             result = Result.FAILED
index 6d27fc011b71b558ee63dae5a7d252428ce252be..dc10161c8d53e189deace30e3832d7f95c47b333 100644 (file)
 import config
 import os
 import re
+import shutil
+import subprocess
 import sys
-import urllib.request, urllib.parse, urllib.error
+import tempfile
+import time
+import urllib.request
+import urllib.parse
+import urllib.error
 import urllib.parse
-import subprocess
 
+from .loggable import Loggable
 from operator import itemgetter
 
 
@@ -250,3 +256,105 @@ def get_scenarios():
     GST_VALIDATE_COMMAND = "gst-validate-1.0"
     os.system("%s --scenarios-defs-output-file %s" % (GST_VALIDATE_COMMAND,
                                                       ))
+
+
+class BackTraceGenerator(Loggable):
+    __instance = None
+    _executable_regex = re.compile(r'Executable: (.*)\n')
+    _timestamp_regex = re.compile(r'Timestamp: .*\((\d*)s ago\)')
+
+    def __init__(self):
+        Loggable.__init__(self)
+
+        self.coredumpctl = shutil.which('coredumpctl')
+        self.gdb = shutil.which('gdb')
+
+    @classmethod
+    def get_default(cls):
+        if not cls.__instance:
+            cls.__instance = BackTraceGenerator()
+
+        return cls.__instance
+
+    def get_trace(self, test):
+        if not test.process.returncode:
+            return self.get_trace_on_running_process(test)
+
+        if self.coredumpctl:
+            return self.get_trace_from_systemd(test)
+
+        self.debug("coredumpctl not present, and it is the only"
+                   " supported way to get backtraces for now.")
+        return None
+
+    def get_trace_on_running_process(self, test):
+        if not self.gdb:
+            return "Can not generate stack trace as `gdb` is not" \
+                "installed."
+
+        gdb = ['gdb', '-ex', 't a a bt', '-batch',
+            '-p', str(test.process.pid)]
+
+        try:
+            return subprocess.check_output(
+                gdb, stderr=subprocess.STDOUT).decode()
+        except Exception as e:
+            return "Could not run `gdb` on process (pid: %d):\n%s" % (
+                test.process.pid, e)
+
+    def get_trace_from_systemd(self, test):
+        for ntry in range(10):
+            if ntry != 0:
+                # Loopping, it means we conceder the logs might not be ready yet.
+                time.sleep(1)
+
+            try:
+                info = subprocess.check_output(['coredumpctl', 'info',
+                                                str(test.process.pid)],
+                                               stderr=subprocess.STDOUT)
+            except subprocess.CalledProcessError:
+                # The trace might not be ready yet
+                time.sleep(1)
+                continue
+
+            info = info.decode()
+            try:
+                executable = BackTraceGenerator._executable_regex.findall(info)[0]
+            except IndexError:
+                self.debug("Backtrace could not be found yet, trying harder.")
+                # The trace might not be ready yet
+                continue
+
+            if executable != test.application:
+                self.debug("PID: %s -- executable %s != test application: %s" % (
+                    test.process.pid, executable, test.application))
+                # The trace might not be ready yet
+                continue
+
+            if not BackTraceGenerator._timestamp_regex.findall(info):
+                self.debug("Timestamp %s is more than 1min old",
+                           re.findall(r'Timestamp: .*', info))
+                # The trace might not be ready yet
+                continue
+
+            bt_all = None
+            if self.gdb:
+                try:
+                    tf = tempfile.NamedTemporaryFile()
+                    subprocess.check_output(['coredumpctl', 'dump',
+                                            str(test.process.pid), '--output=' +
+                                            tf.name], stderr=subprocess.STDOUT)
+
+                    gdb = ['gdb', '-ex', 't a a bt', '-ex', 'quit',
+                        test.application, tf.name]
+                    bt_all = subprocess.check_output(
+                        gdb, stderr=subprocess.STDOUT).decode()
+
+                    info += "\nThread apply all bt:\n\n%s" % (
+                        bt_all.replace('\n', '\n' + 15 * ' '))
+                except Exception as e:
+                    self.debug("Could not get backtrace from gdb: %s" % e)
+
+            return info
+
+        return None