Merge pull request #11171 from codingforfun:fix_11143
[platform/upstream/opencv.git] / modules / ts / misc / trace_profiler.py
1 from __future__ import print_function
2
3 import os
4 import sys
5 import csv
6 from pprint import pprint
7 from collections import deque
8
9 # trace.hpp
10 REGION_FLAG_IMPL_MASK = 15 << 16;
11 REGION_FLAG_IMPL_IPP = 1 << 16;
12 REGION_FLAG_IMPL_OPENCL = 2 << 16;
13
14 DEBUG = False
15
16 if DEBUG:
17     dprint = print
18     dpprint = pprint
19 else:
20     def dprint(args, **kwargs):
21         pass
22     def dpprint(args, **kwargs):
23         pass
24
25 def tryNum(s):
26     if s.startswith('0x'):
27         try:
28             return int(s, 16)
29         except ValueError:
30             pass
31     try:
32         return int(s)
33     except ValueError:
34         pass
35     if sys.version_info[0] < 3:
36         try:
37             return long(s)
38         except ValueError:
39             pass
40     return s
41
42 def formatTimestamp(t):
43     return "%.3f" % (t * 1e-6)
44
45 try:
46     from statistics import median
47 except ImportError:
48     def median(lst):
49         sortedLst = sorted(lst)
50         lstLen = len(lst)
51         index = (lstLen - 1) // 2
52         if (lstLen % 2):
53             return sortedLst[index]
54         else:
55             return (sortedLst[index] + sortedLst[index + 1]) * 0.5
56
57 def getCXXFunctionName(spec):
58     def dropParams(spec):
59         pos = len(spec) - 1
60         depth = 0
61         while pos >= 0:
62             if spec[pos] == ')':
63                 depth = depth + 1
64             elif spec[pos] == '(':
65                 depth = depth - 1
66                 if depth == 0:
67                     if pos == 0 or spec[pos - 1] in ['#', ':']:
68                         res = dropParams(spec[pos+1:-1])
69                         return (spec[:pos] + res[0], res[1])
70                     return (spec[:pos], spec[pos:])
71             pos = pos - 1
72         return (spec, '')
73
74     def extractName(spec):
75         pos = len(spec) - 1
76         inName = False
77         while pos >= 0:
78             if spec[pos] == ' ':
79                 if inName:
80                     return spec[pos+1:]
81             elif spec[pos].isalnum():
82                 inName = True
83             pos = pos - 1
84         return spec
85
86     if spec.startswith('IPP') or spec.startswith('OpenCL'):
87         prefix_size = len('IPP') if spec.startswith('IPP') else len('OpenCL')
88         prefix = spec[:prefix_size]
89         if prefix_size < len(spec) and spec[prefix_size] in ['#', ':']:
90             prefix = prefix + spec[prefix_size]
91             prefix_size = prefix_size + 1
92         begin = prefix_size
93         while begin < len(spec):
94             if spec[begin].isalnum() or spec[begin] in ['_', ':']:
95                 break
96             begin = begin + 1
97         if begin == len(spec):
98             return spec
99         end = begin
100         while end < len(spec):
101             if not (spec[end].isalnum() or spec[end] in ['_', ':']):
102                 break
103             end = end + 1
104         return prefix + spec[begin:end]
105
106     spec = spec.replace(') const', ')') # const methods
107     (ret_type_name, params) = dropParams(spec)
108     name = extractName(ret_type_name)
109     if 'operator' in name:
110         return name + params
111     if name.startswith('&'):
112         return name[1:]
113     return name
114
115 stack_size = 10
116
117 class Trace:
118     def __init__(self, filename=None):
119         self.tasks = {}
120         self.tasks_list = []
121         self.locations = {}
122         self.threads_stack = {}
123         self.pending_files = deque()
124         if filename:
125             self.load(filename)
126
127     class TraceTask:
128         def __init__(self, threadID, taskID, locationID, beginTimestamp):
129             self.threadID = threadID
130             self.taskID = taskID
131             self.locationID = locationID
132             self.beginTimestamp = beginTimestamp
133             self.endTimestamp = None
134             self.parentTaskID = None
135             self.parentThreadID = None
136             self.childTask = []
137             self.selfTimeIPP = 0
138             self.selfTimeOpenCL = 0
139             self.totalTimeIPP = 0
140             self.totalTimeOpenCL = 0
141
142         def __repr__(self):
143             return "TID={} ID={} loc={} parent={}:{} begin={} end={} IPP={}/{} OpenCL={}/{}".format(
144                 self.threadID, self.taskID, self.locationID, self.parentThreadID, self.parentTaskID,
145                 self.beginTimestamp, self.endTimestamp, self.totalTimeIPP, self.selfTimeIPP, self.totalTimeOpenCL, self.selfTimeOpenCL)
146
147
148     class TraceLocation:
149         def __init__(self, locationID, filename, line, name, flags):
150             self.locationID = locationID
151             self.filename = os.path.split(filename)[1]
152             self.line = line
153             self.name = getCXXFunctionName(name)
154             self.flags = flags
155
156         def __str__(self):
157             return "{}#{}:{}".format(self.name, self.filename, self.line)
158
159         def __repr__(self):
160             return "ID={} {}:{}:{}".format(self.locationID, self.filename, self.line, self.name)
161
162     def parse_file(self, filename):
163         dprint("Process file: '{}'".format(filename))
164         with open(filename) as infile:
165             for line in infile:
166                 line = str(line).strip()
167                 if line[0] == "#":
168                     if line.startswith("#thread file:"):
169                         name = str(line.split(':', 1)[1]).strip()
170                         self.pending_files.append(os.path.join(os.path.split(filename)[0], name))
171                     continue
172                 self.parse_line(line)
173
174     def parse_line(self, line):
175         opts = line.split(',')
176         dpprint(opts)
177         if opts[0] == 'l':
178             opts = list(csv.reader([line]))[0]  # process quote more
179             locationID = int(opts[1])
180             filename = str(opts[2])
181             line = int(opts[3])
182             name = opts[4]
183             flags = tryNum(opts[5])
184             self.locations[locationID] = self.TraceLocation(locationID, filename, line, name, flags)
185             return
186         extra_opts = {}
187         for e in opts[5:]:
188             if not '=' in e:
189                 continue
190             (k, v) = e.split('=')
191             extra_opts[k] = tryNum(v)
192         if extra_opts:
193             dpprint(extra_opts)
194         threadID = None
195         taskID = None
196         locationID = None
197         ts = None
198         if opts[0] in ['b', 'e']:
199             threadID = int(opts[1])
200             taskID = int(opts[4])
201             locationID = int(opts[3])
202             ts = tryNum(opts[2])
203         thread_stack = None
204         currentTask = (None, None)
205         if threadID is not None:
206             if not threadID in self.threads_stack:
207                 thread_stack = deque()
208                 self.threads_stack[threadID] = thread_stack
209             else:
210                 thread_stack = self.threads_stack[threadID]
211             currentTask = None if not thread_stack else thread_stack[-1]
212         t = (threadID, taskID)
213         if opts[0] == 'b':
214             assert not t in self.tasks, "Duplicate task: " + str(t) + repr(self.tasks[t])
215             task = self.TraceTask(threadID, taskID, locationID, ts)
216             self.tasks[t] = task
217             self.tasks_list.append(task)
218             thread_stack.append((threadID, taskID))
219             if currentTask:
220                 task.parentThreadID = currentTask[0]
221                 task.parentTaskID = currentTask[1]
222             if 'parentThread' in extra_opts:
223                 task.parentThreadID = extra_opts['parentThread']
224             if 'parent' in extra_opts:
225                 task.parentTaskID = extra_opts['parent']
226         if opts[0] == 'e':
227             task = self.tasks[t]
228             task.endTimestamp = ts
229             if 'tIPP' in extra_opts:
230                 task.selfTimeIPP = extra_opts['tIPP']
231             if 'tOCL' in extra_opts:
232                 task.selfTimeOpenCL = extra_opts['tOCL']
233             thread_stack.pop()
234
235     def load(self, filename):
236         self.pending_files.append(filename)
237         if DEBUG:
238             with open(filename, 'r') as f:
239                 print(f.read(), end='')
240         while self.pending_files:
241             self.parse_file(self.pending_files.pop())
242
243     def getParentTask(self, task):
244         return self.tasks.get((task.parentThreadID, task.parentTaskID), None)
245
246     def process(self):
247         self.tasks_list.sort(key=lambda x: x.beginTimestamp)
248
249         parallel_for_location = None
250         for (id, l) in self.locations.items():
251             if l.name == 'parallel_for':
252                 parallel_for_location = l.locationID
253                 break
254
255         for task in self.tasks_list:
256             try:
257                 task.duration = task.endTimestamp - task.beginTimestamp
258                 task.selfDuration = task.duration
259             except:
260                 task.duration = None
261                 task.selfDuration = None
262             task.totalTimeIPP = task.selfTimeIPP
263             task.totalTimeOpenCL = task.selfTimeOpenCL
264
265         dpprint(self.tasks)
266         dprint("Calculate total times")
267
268         for task in self.tasks_list:
269             parentTask = self.getParentTask(task)
270             if parentTask:
271                 parentTask.selfDuration = parentTask.selfDuration - task.duration
272                 parentTask.childTask.append(task)
273                 timeIPP = task.selfTimeIPP
274                 timeOpenCL = task.selfTimeOpenCL
275                 while parentTask:
276                     if parentTask.locationID == parallel_for_location:  # TODO parallel_for
277                         break
278                     parentLocation = self.locations[parentTask.locationID]
279                     if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP:
280                         parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP
281                         timeIPP = 0
282                     else:
283                         parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP
284                     if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL:
285                         parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL
286                         timeOpenCL = 0
287                     else:
288                         parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL
289                     parentTask = self.getParentTask(parentTask)
290
291         dpprint(self.tasks)
292         dprint("Calculate total times (parallel_for)")
293
294         for task in self.tasks_list:
295             if task.locationID == parallel_for_location:
296                 task.selfDuration = 0
297                 childDuration = sum([t.duration for t in task.childTask])
298                 if task.duration == 0 or childDuration == 0:
299                     continue
300                 timeCoef = task.duration / float(childDuration)
301                 childTimeIPP = sum([t.totalTimeIPP for t in task.childTask])
302                 childTimeOpenCL = sum([t.totalTimeOpenCL for t in task.childTask])
303                 if childTimeIPP == 0 and childTimeOpenCL == 0:
304                     continue
305                 timeIPP = childTimeIPP * timeCoef
306                 timeOpenCL = childTimeOpenCL * timeCoef
307                 parentTask = task
308                 while parentTask:
309                     parentLocation = self.locations[parentTask.locationID]
310                     if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP:
311                         parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP
312                         timeIPP = 0
313                     else:
314                         parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP
315                     if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL:
316                         parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL
317                         timeOpenCL = 0
318                     else:
319                         parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL
320                     parentTask = self.getParentTask(parentTask)
321
322         dpprint(self.tasks)
323         dprint("Done")
324
325     def dump(self, max_entries):
326         assert isinstance(max_entries, int)
327
328         class CallInfo():
329             def __init__(self, callID):
330                 self.callID = callID
331                 self.totalTimes = []
332                 self.selfTimes = []
333                 self.threads = set()
334                 self.selfTimesIPP = []
335                 self.selfTimesOpenCL = []
336                 self.totalTimesIPP = []
337                 self.totalTimesOpenCL = []
338
339         calls = {}
340
341         for currentTask in self.tasks_list:
342             task = currentTask
343             callID = []
344             for i in range(stack_size):
345                 callID.append(task.locationID)
346                 task = self.getParentTask(task)
347                 if not task:
348                     break
349             callID = tuple(callID)
350             if not callID in calls:
351                 call = CallInfo(callID)
352                 calls[callID] = call
353             else:
354                 call = calls[callID]
355             call.totalTimes.append(currentTask.duration)
356             call.selfTimes.append(currentTask.selfDuration)
357             call.threads.add(currentTask.threadID)
358             call.selfTimesIPP.append(currentTask.selfTimeIPP)
359             call.selfTimesOpenCL.append(currentTask.selfTimeOpenCL)
360             call.totalTimesIPP.append(currentTask.totalTimeIPP)
361             call.totalTimesOpenCL.append(currentTask.totalTimeOpenCL)
362
363         dpprint(self.tasks)
364         dpprint(self.locations)
365         dpprint(calls)
366
367         calls_self_sum = {k: sum(v.selfTimes) for (k, v) in calls.items()}
368         calls_total_sum = {k: sum(v.totalTimes) for (k, v) in calls.items()}
369         calls_median = {k: median(v.selfTimes) for (k, v) in calls.items()}
370         calls_sorted = sorted(calls.keys(), key=lambda x: calls_self_sum[x], reverse=True)
371
372         calls_self_sum_IPP = {k: sum(v.selfTimesIPP) for (k, v) in calls.items()}
373         calls_total_sum_IPP = {k: sum(v.totalTimesIPP) for (k, v) in calls.items()}
374
375         calls_self_sum_OpenCL = {k: sum(v.selfTimesOpenCL) for (k, v) in calls.items()}
376         calls_total_sum_OpenCL = {k: sum(v.totalTimesOpenCL) for (k, v) in calls.items()}
377
378         if max_entries > 0 and len(calls_sorted) > max_entries:
379             calls_sorted = calls_sorted[:max_entries]
380
381         def formatPercents(p):
382             if p is not None:
383                 return "{:>3d}".format(int(p*100))
384             return ''
385
386         name_width = 70
387         timestamp_width = 12
388         def fmtTS():
389             return '{:>' + str(timestamp_width) + '}'
390         fmt = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2)
391         fmt2 = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2)
392         print(fmt.format("ID", "name", "count", "thr", "min", "max", "median", "avg", "*self*", "IPP", "%", "OpenCL", "%"))
393         print(fmt2.format("", "", "", "", "t-min", "t-max", "t-median", "t-avg", "total", "t-IPP", "%", "t-OpenCL", "%"))
394         for (index, callID) in enumerate(calls_sorted):
395             call_self_times = calls[callID].selfTimes
396             loc0 = self.locations[callID[0]]
397             loc_array = []  # [str(callID)]
398             for (i, l) in enumerate(callID):
399                 loc = self.locations[l]
400                 loc_array.append(loc.name if i > 0 else str(loc))
401             loc_str = '|'.join(loc_array)
402             if len(loc_str) > name_width: loc_str = loc_str[:name_width-3]+'...'
403             print(fmt.format(index + 1, loc_str, len(call_self_times),
404                     len(calls[callID].threads),
405                     formatTimestamp(min(call_self_times)),
406                     formatTimestamp(max(call_self_times)),
407                     formatTimestamp(calls_median[callID]),
408                     formatTimestamp(sum(call_self_times)/float(len(call_self_times))),
409                     formatTimestamp(sum(call_self_times)),
410                     formatTimestamp(calls_self_sum_IPP[callID]),
411                     formatPercents(calls_self_sum_IPP[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None),
412                     formatTimestamp(calls_self_sum_OpenCL[callID]),
413                     formatPercents(calls_self_sum_OpenCL[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None),
414                 ))
415             call_total_times = calls[callID].totalTimes
416             print(fmt2.format("", "", "", "",
417                     formatTimestamp(min(call_total_times)),
418                     formatTimestamp(max(call_total_times)),
419                     formatTimestamp(median(call_total_times)),
420                     formatTimestamp(sum(call_total_times)/float(len(call_total_times))),
421                     formatTimestamp(sum(call_total_times)),
422                     formatTimestamp(calls_total_sum_IPP[callID]),
423                     formatPercents(calls_total_sum_IPP[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None),
424                     formatTimestamp(calls_total_sum_OpenCL[callID]),
425                     formatPercents(calls_total_sum_OpenCL[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None),
426                 ))
427             print()
428
429 if __name__ == "__main__":
430     tracefile = sys.argv[1] if len(sys.argv) > 1 else 'OpenCVTrace.txt'
431     count = int(sys.argv[2]) if len(sys.argv) > 2 else 10
432     trace = Trace(tracefile)
433     trace.process()
434     trace.dump(max_entries = count)
435     print("OK")