pm-graph v5.10
authorTodd Brandt <todd.e.brandt@intel.com>
Thu, 20 Oct 2022 09:23:10 +0000 (02:23 -0700)
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>
Tue, 25 Oct 2022 15:46:15 +0000 (17:46 +0200)
sleepgraph:
- add -wifitrace argument for tracing all the way to wifi reconnect
- include more data in ftrace to mark the end of kernel resume
- add async_synchronize_full to the list of funcs to chart
- add thermal zone info to the log data
- include a check for s0ix support (s2idle is the default mem_sleep)
- if s2idle does not support s0ix, remove the SYS%LPI turbostat var
- fix -dev crash when kprobe caller is just an address (not a symbol)
- fix the cpuexec data in -proc to display in resume

sleepgraph.8:
- add -wifitrace documentation

README:
- change links from 01.org to developer.intel.com

Signed-off-by: Todd Brandt <todd.e.brandt@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
tools/power/pm-graph/README
tools/power/pm-graph/sleepgraph.8
tools/power/pm-graph/sleepgraph.py

index e6020c0..3213dbe 100644 (file)
@@ -6,22 +6,22 @@
    |_|                    |___/          |_|
 
    pm-graph: suspend/resume/boot timing analysis tools
-    Version: 5.9
+    Version: 5.10
      Author: Todd Brandt <todd.e.brandt@intel.com>
-  Home Page: https://01.org/pm-graph
+  Home Page: https://www.intel.com/content/www/us/en/developer/topic-technology/open/pm-graph/overview.html
 
  Report bugs/issues at bugzilla.kernel.org Tools/pm-graph
        - https://bugzilla.kernel.org/buglist.cgi?component=pm-graph&product=Tools
 
  Full documentation available online & in man pages
        - Getting Started:
-         https://01.org/pm-graph/documentation/getting-started
+         https://www.intel.com/content/www/us/en/developer/articles/technical/usage.html
 
-       - Config File Format:
-         https://01.org/pm-graph/documentation/3-config-file-format
+       - Feature Summary:
+         https://www.intel.com/content/www/us/en/developer/topic-technology/open/pm-graph/features.html
 
        - upstream version in git:
-         https://github.com/intel/pm-graph/
+         git clone https://github.com/intel/pm-graph/
 
  Table of Contents
        - Overview
index 5126271..643271b 100644 (file)
@@ -78,6 +78,9 @@ This helps maintain the consistency of test data for better comparison.
 If a wifi connection is available, check that it reconnects after resume. Include
 the reconnect time in the total resume time calculation and treat wifi timeouts
 as resume failures.
+.TP
+\fB-wifitrace\fR
+Trace through the wifi reconnect time and include it in the timeline.
 
 .SS "advanced"
 .TP
index 33981ad..cfe3433 100755 (executable)
@@ -86,7 +86,7 @@ def ascii(text):
 #       store system values and test parameters
 class SystemValues:
        title = 'SleepGraph'
-       version = '5.9'
+       version = '5.10'
        ansi = False
        rs = 0
        display = ''
@@ -100,6 +100,7 @@ class SystemValues:
        ftracelog = False
        acpidebug = True
        tstat = True
+       wifitrace = False
        mindevlen = 0.0001
        mincglen = 0.0
        cgphase = ''
@@ -124,6 +125,7 @@ class SystemValues:
        epath = '/sys/kernel/debug/tracing/events/power/'
        pmdpath = '/sys/power/pm_debug_messages'
        s0ixpath = '/sys/module/intel_pmc_core/parameters/warn_on_s0ix_failures'
+       s0ixres = '/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us'
        acpipath='/sys/module/acpi/parameters/debug_level'
        traceevents = [
                'suspend_resume',
@@ -180,6 +182,7 @@ class SystemValues:
        tmstart = 'SUSPEND START %Y%m%d-%H:%M:%S.%f'
        tmend = 'RESUME COMPLETE %Y%m%d-%H:%M:%S.%f'
        tracefuncs = {
+               'async_synchronize_full': {},
                'sys_sync': {},
                'ksys_sync': {},
                '__pm_notifier_call_chain': {},
@@ -304,6 +307,7 @@ class SystemValues:
                [2, 'suspendstats', 'sh', '-c', 'grep -v invalid /sys/power/suspend_stats/*'],
                [2, 'cpuidle', 'sh', '-c', 'grep -v invalid /sys/devices/system/cpu/cpu*/cpuidle/state*/s2idle/*'],
                [2, 'battery', 'sh', '-c', 'grep -v invalid /sys/class/power_supply/*/*'],
+               [2, 'thermal', 'sh', '-c', 'grep . /sys/class/thermal/thermal_zone*/temp'],
        ]
        cgblacklist = []
        kprobes = dict()
@@ -777,7 +781,7 @@ class SystemValues:
                        return
                if not quiet:
                        sysvals.printSystemInfo(False)
-                       pprint('INITIALIZING FTRACE...')
+                       pprint('INITIALIZING FTRACE')
                # turn trace off
                self.fsetVal('0', 'tracing_on')
                self.cleanupFtrace()
@@ -841,7 +845,7 @@ class SystemValues:
                                for name in self.dev_tracefuncs:
                                        self.defaultKprobe(name, self.dev_tracefuncs[name])
                        if not quiet:
-                               pprint('INITIALIZING KPROBES...')
+                               pprint('INITIALIZING KPROBES')
                        self.addKprobes(self.verbose)
                if(self.usetraceevents):
                        # turn trace events on
@@ -1133,6 +1137,15 @@ class SystemValues:
                                self.cfgdef[file] = fp.read().strip()
                        fp.write(value)
                        fp.close()
+       def s0ixSupport(self):
+               if not os.path.exists(self.s0ixres) or not os.path.exists(self.mempowerfile):
+                       return False
+               fp = open(sysvals.mempowerfile, 'r')
+               data = fp.read().strip()
+               fp.close()
+               if '[s2idle]' in data:
+                       return True
+               return False
        def haveTurbostat(self):
                if not self.tstat:
                        return False
@@ -1146,7 +1159,7 @@ class SystemValues:
                        self.vprint(out)
                        return True
                return False
-       def turbostat(self):
+       def turbostat(self, s0ixready):
                cmd = self.getExec('turbostat')
                rawout = keyline = valline = ''
                fullcmd = '%s -q -S echo freeze > %s' % (cmd, self.powerfile)
@@ -1173,6 +1186,8 @@ class SystemValues:
                for key in keyline:
                        idx = keyline.index(key)
                        val = valline[idx]
+                       if key == 'SYS%LPI' and not s0ixready and re.match('^[0\.]*$', val):
+                               continue
                        out.append('%s=%s' % (key, val))
                return '|'.join(out)
        def netfixon(self, net='both'):
@@ -1183,14 +1198,6 @@ class SystemValues:
                out = ascii(fp.read()).strip()
                fp.close()
                return out
-       def wifiRepair(self):
-               out = self.netfixon('wifi')
-               if not out or 'error' in out.lower():
-                       return ''
-               m = re.match('WIFI \S* ONLINE (?P<action>\S*)', out)
-               if not m:
-                       return 'dead'
-               return m.group('action')
        def wifiDetails(self, dev):
                try:
                        info = open('/sys/class/net/%s/device/uevent' % dev, 'r').read().strip()
@@ -1220,11 +1227,6 @@ class SystemValues:
                                return '%s reconnected %.2f' % \
                                        (self.wifiDetails(dev), max(0, time.time() - start))
                        time.sleep(0.01)
-               if self.netfix:
-                       res = self.wifiRepair()
-                       if res:
-                               timeout = max(0, time.time() - start)
-                               return '%s %s %d' % (self.wifiDetails(dev), res, timeout)
                return '%s timeout %d' % (self.wifiDetails(dev), timeout)
        def errorSummary(self, errinfo, msg):
                found = False
@@ -1346,6 +1348,20 @@ class SystemValues:
                        for i in self.rslist:
                                self.setVal(self.rstgt, i)
                        pprint('runtime suspend settings restored on %d devices' % len(self.rslist))
+       def start(self, pm):
+               if self.useftrace:
+                       self.dlog('start ftrace tracing')
+                       self.fsetVal('1', 'tracing_on')
+                       if self.useprocmon:
+                               self.dlog('start the process monitor')
+                               pm.start()
+       def stop(self, pm):
+               if self.useftrace:
+                       if self.useprocmon:
+                               self.dlog('stop the process monitor')
+                               pm.stop()
+                       self.dlog('stop ftrace tracing')
+                       self.fsetVal('0', 'tracing_on')
 
 sysvals = SystemValues()
 switchvalues = ['enable', 'disable', 'on', 'off', 'true', 'false', '1', '0']
@@ -1643,19 +1659,20 @@ class Data:
                ubiquitous = False
                if kprobename in dtf and 'ub' in dtf[kprobename]:
                        ubiquitous = True
-               title = cdata+' '+rdata
-               mstr = '\(.*\) *(?P<args>.*) *\((?P<caller>.*)\+.* arg1=(?P<ret>.*)'
-               m = re.match(mstr, title)
-               if m:
-                       c = m.group('caller')
-                       a = m.group('args').strip()
-                       r = m.group('ret')
+               mc = re.match('\(.*\) *(?P<args>.*)', cdata)
+               mr = re.match('\((?P<caller>\S*).* arg1=(?P<ret>.*)', rdata)
+               if mc and mr:
+                       c = mr.group('caller').split('+')[0]
+                       a = mc.group('args').strip()
+                       r = mr.group('ret')
                        if len(r) > 6:
                                r = ''
                        else:
                                r = 'ret=%s ' % r
                        if ubiquitous and c in dtf and 'ub' in dtf[c]:
                                return False
+               else:
+                       return False
                color = sysvals.kprobeColor(kprobename)
                e = DevFunction(displayname, a, c, r, start, end, ubiquitous, proc, pid, color)
                tgtdev['src'].append(e)
@@ -1772,6 +1789,14 @@ class Data:
                                                e.time = self.trimTimeVal(e.time, t0, dT, left)
                                                e.end = self.trimTimeVal(e.end, t0, dT, left)
                                                e.length = e.end - e.time
+                               if('cpuexec' in d):
+                                       cpuexec = dict()
+                                       for e in d['cpuexec']:
+                                               c0, cN = e
+                                               c0 = self.trimTimeVal(c0, t0, dT, left)
+                                               cN = self.trimTimeVal(cN, t0, dT, left)
+                                               cpuexec[(c0, cN)] = d['cpuexec'][e]
+                                       d['cpuexec'] = cpuexec
                for dir in ['suspend', 'resume']:
                        list = []
                        for e in self.errorinfo[dir]:
@@ -2086,75 +2111,43 @@ class Data:
                return d
        def addProcessUsageEvent(self, name, times):
                # get the start and end times for this process
-               maxC = 0
-               tlast = 0
-               start = -1
-               end = -1
+               cpuexec = dict()
+               tlast = start = end = -1
                for t in sorted(times):
-                       if tlast == 0:
+                       if tlast < 0:
                                tlast = t
                                continue
-                       if name in self.pstl[t]:
-                               if start == -1 or tlast < start:
+                       if name in self.pstl[t] and self.pstl[t][name] > 0:
+                               if start < 0:
                                        start = tlast
-                               if end == -1 or t > end:
-                                       end = t
+                               end, key = t, (tlast, t)
+                               maxj = (t - tlast) * 1024.0
+                               cpuexec[key] = min(1.0, float(self.pstl[t][name]) / maxj)
                        tlast = t
-               if start == -1 or end == -1:
-                       return 0
+               if start < 0 or end < 0:
+                       return
                # add a new action for this process and get the object
                out = self.newActionGlobal(name, start, end, -3)
-               if not out:
-                       return 0
-               phase, devname = out
-               dev = self.dmesg[phase]['list'][devname]
-               # get the cpu exec data
-               tlast = 0
-               clast = 0
-               cpuexec = dict()
-               for t in sorted(times):
-                       if tlast == 0 or t <= start or t > end:
-                               tlast = t
-                               continue
-                       list = self.pstl[t]
-                       c = 0
-                       if name in list:
-                               c = list[name]
-                       if c > maxC:
-                               maxC = c
-                       if c != clast:
-                               key = (tlast, t)
-                               cpuexec[key] = c
-                               tlast = t
-                               clast = c
-               dev['cpuexec'] = cpuexec
-               return maxC
+               if out:
+                       phase, devname = out
+                       dev = self.dmesg[phase]['list'][devname]
+                       dev['cpuexec'] = cpuexec
        def createProcessUsageEvents(self):
-               # get an array of process names
-               proclist = []
-               for t in sorted(self.pstl):
-                       pslist = self.pstl[t]
-                       for ps in sorted(pslist):
-                               if ps not in proclist:
-                                       proclist.append(ps)
-               # get a list of data points for suspend and resume
-               tsus = []
-               tres = []
+               # get an array of process names and times
+               proclist = {'sus': dict(), 'res': dict()}
+               tdata = {'sus': [], 'res': []}
                for t in sorted(self.pstl):
-                       if t < self.tSuspended:
-                               tsus.append(t)
-                       else:
-                               tres.append(t)
+                       dir = 'sus' if t < self.tSuspended else 'res'
+                       for ps in sorted(self.pstl[t]):
+                               if ps not in proclist[dir]:
+                                       proclist[dir][ps] = 0
+                       tdata[dir].append(t)
                # process the events for suspend and resume
-               if len(proclist) > 0:
+               if len(proclist['sus']) > 0 or len(proclist['res']) > 0:
                        sysvals.vprint('Process Execution:')
-               for ps in proclist:
-                       c = self.addProcessUsageEvent(ps, tsus)
-                       if c > 0:
-                               sysvals.vprint('%25s (sus): %d' % (ps, c))
-                       c = self.addProcessUsageEvent(ps, tres)
-                       if c > 0:
-                               sysvals.vprint('%25s (res): %d' % (ps, c))
+               for dir in ['sus', 'res']:
+                       for ps in sorted(proclist[dir]):
+                               self.addProcessUsageEvent(ps, tdata[dir])
        def handleEndMarker(self, time, msg=''):
                dm = self.dmesg
                self.setEnd(time, msg)
@@ -3218,7 +3211,7 @@ class ProcessMonitor:
 #       markers, and/or kprobes required for primary parsing.
 def doesTraceLogHaveTraceEvents():
        kpcheck = ['_cal: (', '_ret: (']
-       techeck = ['suspend_resume', 'device_pm_callback']
+       techeck = ['suspend_resume', 'device_pm_callback', 'tracing_mark_write']
        tmcheck = ['SUSPEND START', 'RESUME COMPLETE']
        sysvals.usekprobes = False
        fp = sysvals.openlog(sysvals.ftracefile, 'r')
@@ -3241,7 +3234,7 @@ def doesTraceLogHaveTraceEvents():
                                check.remove(i)
                tmcheck = check
        fp.close()
-       sysvals.usetraceevents = True if len(techeck) < 2 else False
+       sysvals.usetraceevents = True if len(techeck) < 3 else False
        sysvals.usetracemarkers = True if len(tmcheck) == 0 else False
 
 # Function: appendIncompleteTraceLog
@@ -3456,6 +3449,8 @@ def parseTraceLog(live=False):
                        continue
                # process cpu exec line
                if t.type == 'tracing_mark_write':
+                       if t.name == 'CMD COMPLETE' and data.tKernRes == 0:
+                               data.tKernRes = t.time
                        m = re.match(tp.procexecfmt, t.name)
                        if(m):
                                parts, msg = 1, m.group('ps')
@@ -3674,6 +3669,9 @@ def parseTraceLog(live=False):
                                e = next((x for x in reversed(tp.ktemp[key]) if x['end'] < 0), 0)
                                if not e:
                                        continue
+                               if (t.time - e['begin']) * 1000 < sysvals.mindevlen:
+                                       tp.ktemp[key].pop()
+                                       continue
                                e['end'] = t.time
                                e['rdata'] = kprobedata
                                # end of kernel resume
@@ -4213,6 +4211,8 @@ def callgraphHTML(sv, hf, num, cg, title, color, devid):
                        fmt = '<n>(%.3f ms @ '+sv.timeformat+')</n>'
                        flen = fmt % (line.length*1000, line.time)
                if line.isLeaf():
+                       if line.length * 1000 < sv.mincglen:
+                               continue
                        hf.write(html_func_leaf.format(line.name, flen))
                elif line.freturn:
                        hf.write(html_func_end)
@@ -4827,14 +4827,11 @@ def createHTML(testruns, testfail):
                                        if('cpuexec' in dev):
                                                for t in sorted(dev['cpuexec']):
                                                        start, end = t
-                                                       j = float(dev['cpuexec'][t]) / 5
-                                                       if j > 1.0:
-                                                               j = 1.0
                                                        height = '%.3f' % (rowheight/3)
                                                        top = '%.3f' % (rowtop + devtl.scaleH + 2*rowheight/3)
                                                        left = '%f' % (((start-m0)*100)/mTotal)
                                                        width = '%f' % ((end-start)*100/mTotal)
-                                                       color = 'rgba(255, 0, 0, %f)' % j
+                                                       color = 'rgba(255, 0, 0, %f)' % dev['cpuexec'][t]
                                                        devtl.html += \
                                                                html_cpuexec.format(left, top, height, width, color)
                                        if('src' not in dev):
@@ -5453,17 +5450,9 @@ def executeSuspend(quiet=False):
                call('sync', shell=True)
        sv.dlog('read dmesg')
        sv.initdmesg()
-       # start ftrace
-       if sv.useftrace:
-               if not quiet:
-                       pprint('START TRACING')
-               sv.dlog('start ftrace tracing')
-               sv.fsetVal('1', 'tracing_on')
-               if sv.useprocmon:
-                       sv.dlog('start the process monitor')
-                       pm.start()
-       sv.dlog('run the cmdinfo list before')
+       sv.dlog('cmdinfo before')
        sv.cmdinfo(True)
+       sv.start(pm)
        # execute however many s/r runs requested
        for count in range(1,sv.execcount+1):
                # x2delay in between test runs
@@ -5500,6 +5489,7 @@ def executeSuspend(quiet=False):
                        if res != 0:
                                tdata['error'] = 'cmd returned %d' % res
                else:
+                       s0ixready = sv.s0ixSupport()
                        mode = sv.suspendmode
                        if sv.memmode and os.path.exists(sv.mempowerfile):
                                mode = 'mem'
@@ -5509,9 +5499,10 @@ def executeSuspend(quiet=False):
                                sv.testVal(sv.diskpowerfile, 'radio', sv.diskmode)
                        if sv.acpidebug:
                                sv.testVal(sv.acpipath, 'acpi', '0xe')
-                       if mode == 'freeze' and sv.haveTurbostat():
+                       if ((mode == 'freeze') or (sv.memmode == 's2idle')) \
+                               and sv.haveTurbostat():
                                # execution will pause here
-                               turbo = sv.turbostat()
+                               turbo = sv.turbostat(s0ixready)
                                if turbo:
                                        tdata['turbo'] = turbo
                        else:
@@ -5522,7 +5513,8 @@ def executeSuspend(quiet=False):
                                        pf.close()
                                except Exception as e:
                                        tdata['error'] = str(e)
-               sv.dlog('system returned from resume')
+               sv.fsetVal('CMD COMPLETE', 'trace_marker')
+               sv.dlog('system returned')
                # reset everything
                sv.testVal('restoreall')
                if(sv.rtcwake):
@@ -5535,33 +5527,29 @@ def executeSuspend(quiet=False):
                        sv.fsetVal('WAIT END', 'trace_marker')
                # return from suspend
                pprint('RESUME COMPLETE')
-               sv.fsetVal(datetime.now().strftime(sv.tmend), 'trace_marker')
+               if(count < sv.execcount):
+                       sv.fsetVal(datetime.now().strftime(sv.tmend), 'trace_marker')
+               elif(not sv.wifitrace):
+                       sv.fsetVal(datetime.now().strftime(sv.tmend), 'trace_marker')
+                       sv.stop(pm)
                if sv.wifi and wifi:
                        tdata['wifi'] = sv.pollWifi(wifi)
                        sv.dlog('wifi check, %s' % tdata['wifi'])
-                       if sv.netfix:
-                               netfixout = sv.netfixon('wired')
-               elif sv.netfix:
-                       netfixout = sv.netfixon()
-               if sv.netfix and netfixout:
-                       tdata['netfix'] = netfixout
+               if(count == sv.execcount and sv.wifitrace):
+                       sv.fsetVal(datetime.now().strftime(sv.tmend), 'trace_marker')
+                       sv.stop(pm)
+               if sv.netfix:
+                       tdata['netfix'] = sv.netfixon()
                        sv.dlog('netfix, %s' % tdata['netfix'])
                if(sv.suspendmode == 'mem' or sv.suspendmode == 'command'):
                        sv.dlog('read the ACPI FPDT')
                        tdata['fw'] = getFPDT(False)
                testdata.append(tdata)
-       sv.dlog('run the cmdinfo list after')
+       sv.dlog('cmdinfo after')
        cmdafter = sv.cmdinfo(False)
-       # stop ftrace
-       if sv.useftrace:
-               if sv.useprocmon:
-                       sv.dlog('stop the process monitor')
-                       pm.stop()
-               sv.fsetVal('0', 'tracing_on')
        # grab a copy of the dmesg output
        if not quiet:
                pprint('CAPTURING DMESG')
-       sysvals.dlog('EXECUTION TRACE END')
        sv.getdmesg(testdata)
        # grab a copy of the ftrace output
        if sv.useftrace:
@@ -6350,6 +6338,8 @@ def data_from_html(file, outpath, issues, fulldetail=False):
                if not m:
                        continue
                name, time, phase = m.group('n'), m.group('t'), m.group('p')
+               if name == 'async_synchronize_full':
+                       continue
                if ' async' in name or ' sync' in name:
                        name = ' '.join(name.split(' ')[:-1])
                if phase.startswith('suspend'):
@@ -6701,6 +6691,7 @@ def printHelp():
        '   -skiphtml    Run the test and capture the trace logs, but skip the timeline (default: disabled)\n'\
        '   -result fn   Export a results table to a text file for parsing.\n'\
        '   -wifi        If a wifi connection is available, check that it reconnects after resume.\n'\
+       '   -wifitrace   Trace kernel execution through wifi reconnect.\n'\
        '   -netfix      Use netfix to reset the network in the event it fails to resume.\n'\
        '  [testprep]\n'\
        '   -sync        Sync the filesystems before starting the test\n'\
@@ -6828,6 +6819,8 @@ if __name__ == '__main__':
                        sysvals.sync = True
                elif(arg == '-wifi'):
                        sysvals.wifi = True
+               elif(arg == '-wifitrace'):
+                       sysvals.wifitrace = True
                elif(arg == '-netfix'):
                        sysvals.netfix = True
                elif(arg == '-gzip'):