Merge pull request #21267 from mshabunin:fix-kw-2021-12
[platform/upstream/opencv.git] / modules / ts / misc / trace_profiler.py
1 from __future__ import print_function
2
3 import os
4 import sys
5 import csv
6 from pprint import pprint
7 from collections import deque
8
9 try:
10     long        # Python 2
11 except NameError:
12     long = int  # Python 3
13
14 # trace.hpp
15 REGION_FLAG_IMPL_MASK = 15 << 16
16 REGION_FLAG_IMPL_IPP = 1 << 16
17 REGION_FLAG_IMPL_OPENCL = 2 << 16
18
19 DEBUG = False
20
21 if DEBUG:
22     dprint = print
23     dpprint = pprint
24 else:
25     def dprint(args, **kwargs):
26         pass
27     def dpprint(args, **kwargs):
28         pass
29
30 def tryNum(s):
31     if s.startswith('0x'):
32         try:
33             return int(s, 16)
34         except ValueError:
35             pass
36     try:
37         return int(s)
38     except ValueError:
39         pass
40     if sys.version_info[0] < 3:
41         try:
42             return long(s)
43         except ValueError:
44             pass
45     return s
46
47 def formatTimestamp(t):
48     return "%.3f" % (t * 1e-6)
49
50 try:
51     from statistics import median
52 except ImportError:
53     def median(lst):
54         sortedLst = sorted(lst)
55         lstLen = len(lst)
56         index = (lstLen - 1) // 2
57         if (lstLen % 2):
58             return sortedLst[index]
59         else:
60             return (sortedLst[index] + sortedLst[index + 1]) * 0.5
61
62 def getCXXFunctionName(spec):
63     def dropParams(spec):
64         pos = len(spec) - 1
65         depth = 0
66         while pos >= 0:
67             if spec[pos] == ')':
68                 depth = depth + 1
69             elif spec[pos] == '(':
70                 depth = depth - 1
71                 if depth == 0:
72                     if pos == 0 or spec[pos - 1] in ['#', ':']:
73                         res = dropParams(spec[pos+1:-1])
74                         return (spec[:pos] + res[0], res[1])
75                     return (spec[:pos], spec[pos:])
76             pos = pos - 1
77         return (spec, '')
78
79     def extractName(spec):
80         pos = len(spec) - 1
81         inName = False
82         while pos >= 0:
83             if spec[pos] == ' ':
84                 if inName:
85                     return spec[pos+1:]
86             elif spec[pos].isalnum():
87                 inName = True
88             pos = pos - 1
89         return spec
90
91     if spec.startswith('IPP') or spec.startswith('OpenCL'):
92         prefix_size = len('IPP') if spec.startswith('IPP') else len('OpenCL')
93         prefix = spec[:prefix_size]
94         if prefix_size < len(spec) and spec[prefix_size] in ['#', ':']:
95             prefix = prefix + spec[prefix_size]
96             prefix_size = prefix_size + 1
97         begin = prefix_size
98         while begin < len(spec):
99             if spec[begin].isalnum() or spec[begin] in ['_', ':']:
100                 break
101             begin = begin + 1
102         if begin == len(spec):
103             return spec
104         end = begin
105         while end < len(spec):
106             if not (spec[end].isalnum() or spec[end] in ['_', ':']):
107                 break
108             end = end + 1
109         return prefix + spec[begin:end]
110
111     spec = spec.replace(') const', ')') # const methods
112     (ret_type_name, params) = dropParams(spec)
113     name = extractName(ret_type_name)
114     if 'operator' in name:
115         return name + params
116     if name.startswith('&'):
117         return name[1:]
118     return name
119
120 stack_size = 10
121
122 class Trace:
123     def __init__(self, filename=None):
124         self.tasks = {}
125         self.tasks_list = []
126         self.locations = {}
127         self.threads_stack = {}
128         self.pending_files = deque()
129         if filename:
130             self.load(filename)
131
132     class TraceTask:
133         def __init__(self, threadID, taskID, locationID, beginTimestamp):
134             self.threadID = threadID
135             self.taskID = taskID
136             self.locationID = locationID
137             self.beginTimestamp = beginTimestamp
138             self.endTimestamp = None
139             self.parentTaskID = None
140             self.parentThreadID = None
141             self.childTask = []
142             self.selfTimeIPP = 0
143             self.selfTimeOpenCL = 0
144             self.totalTimeIPP = 0
145             self.totalTimeOpenCL = 0
146
147         def __repr__(self):
148             return "TID={} ID={} loc={} parent={}:{} begin={} end={} IPP={}/{} OpenCL={}/{}".format(
149                 self.threadID, self.taskID, self.locationID, self.parentThreadID, self.parentTaskID,
150                 self.beginTimestamp, self.endTimestamp, self.totalTimeIPP, self.selfTimeIPP, self.totalTimeOpenCL, self.selfTimeOpenCL)
151
152
153     class TraceLocation:
154         def __init__(self, locationID, filename, line, name, flags):
155             self.locationID = locationID
156             self.filename = os.path.split(filename)[1]
157             self.line = line
158             self.name = getCXXFunctionName(name)
159             self.flags = flags
160
161         def __str__(self):
162             return "{}#{}:{}".format(self.name, self.filename, self.line)
163
164         def __repr__(self):
165             return "ID={} {}:{}:{}".format(self.locationID, self.filename, self.line, self.name)
166
167     def parse_file(self, filename):
168         dprint("Process file: '{}'".format(filename))
169         with open(filename) as infile:
170             for line in infile:
171                 line = str(line).strip()
172                 if line[0] == "#":
173                     if line.startswith("#thread file:"):
174                         name = str(line.split(':', 1)[1]).strip()
175                         self.pending_files.append(os.path.join(os.path.split(filename)[0], name))
176                     continue
177                 self.parse_line(line)
178
179     def parse_line(self, line):
180         opts = line.split(',')
181         dpprint(opts)
182         if opts[0] == 'l':
183             opts = list(csv.reader([line]))[0]  # process quote more
184             locationID = int(opts[1])
185             filename = str(opts[2])
186             line = int(opts[3])
187             name = opts[4]
188             flags = tryNum(opts[5])
189             self.locations[locationID] = self.TraceLocation(locationID, filename, line, name, flags)
190             return
191         extra_opts = {}
192         for e in opts[5:]:
193             if not '=' in e:
194                 continue
195             (k, v) = e.split('=')
196             extra_opts[k] = tryNum(v)
197         if extra_opts:
198             dpprint(extra_opts)
199         threadID = None
200         taskID = None
201         locationID = None
202         ts = None
203         if opts[0] in ['b', 'e']:
204             threadID = int(opts[1])
205             taskID = int(opts[4])
206             locationID = int(opts[3])
207             ts = tryNum(opts[2])
208         thread_stack = None
209         currentTask = (None, None)
210         if threadID is not None:
211             if not threadID in self.threads_stack:
212                 thread_stack = deque()
213                 self.threads_stack[threadID] = thread_stack
214             else:
215                 thread_stack = self.threads_stack[threadID]
216             currentTask = None if not thread_stack else thread_stack[-1]
217         t = (threadID, taskID)
218         if opts[0] == 'b':
219             assert not t in self.tasks, "Duplicate task: " + str(t) + repr(self.tasks[t])
220             task = self.TraceTask(threadID, taskID, locationID, ts)
221             self.tasks[t] = task
222             self.tasks_list.append(task)
223             thread_stack.append((threadID, taskID))
224             if currentTask:
225                 task.parentThreadID = currentTask[0]
226                 task.parentTaskID = currentTask[1]
227             if 'parentThread' in extra_opts:
228                 task.parentThreadID = extra_opts['parentThread']
229             if 'parent' in extra_opts:
230                 task.parentTaskID = extra_opts['parent']
231         if opts[0] == 'e':
232             task = self.tasks[t]
233             task.endTimestamp = ts
234             if 'tIPP' in extra_opts:
235                 task.selfTimeIPP = extra_opts['tIPP']
236             if 'tOCL' in extra_opts:
237                 task.selfTimeOpenCL = extra_opts['tOCL']
238             thread_stack.pop()
239
240     def load(self, filename):
241         self.pending_files.append(filename)
242         if DEBUG:
243             with open(filename, 'r') as f:
244                 print(f.read(), end='')
245         while self.pending_files:
246             self.parse_file(self.pending_files.pop())
247
248     def getParentTask(self, task):
249         return self.tasks.get((task.parentThreadID, task.parentTaskID), None)
250
251     def process(self):
252         self.tasks_list.sort(key=lambda x: x.beginTimestamp)
253
254         parallel_for_location = None
255         for (id, l) in self.locations.items():
256             if l.name == 'parallel_for':
257                 parallel_for_location = l.locationID
258                 break
259
260         for task in self.tasks_list:
261             try:
262                 task.duration = task.endTimestamp - task.beginTimestamp
263                 task.selfDuration = task.duration
264             except:
265                 task.duration = None
266                 task.selfDuration = None
267             task.totalTimeIPP = task.selfTimeIPP
268             task.totalTimeOpenCL = task.selfTimeOpenCL
269
270         dpprint(self.tasks)
271         dprint("Calculate total times")
272
273         for task in self.tasks_list:
274             parentTask = self.getParentTask(task)
275             if parentTask:
276                 parentTask.selfDuration = parentTask.selfDuration - task.duration
277                 parentTask.childTask.append(task)
278                 timeIPP = task.selfTimeIPP
279                 timeOpenCL = task.selfTimeOpenCL
280                 while parentTask:
281                     if parentTask.locationID == parallel_for_location:  # TODO parallel_for
282                         break
283                     parentLocation = self.locations[parentTask.locationID]
284                     if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP:
285                         parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP
286                         timeIPP = 0
287                     else:
288                         parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP
289                     if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL:
290                         parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL
291                         timeOpenCL = 0
292                     else:
293                         parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL
294                     parentTask = self.getParentTask(parentTask)
295
296         dpprint(self.tasks)
297         dprint("Calculate total times (parallel_for)")
298
299         for task in self.tasks_list:
300             if task.locationID == parallel_for_location:
301                 task.selfDuration = 0
302                 childDuration = sum([t.duration for t in task.childTask])
303                 if task.duration == 0 or childDuration == 0:
304                     continue
305                 timeCoef = task.duration / float(childDuration)
306                 childTimeIPP = sum([t.totalTimeIPP for t in task.childTask])
307                 childTimeOpenCL = sum([t.totalTimeOpenCL for t in task.childTask])
308                 if childTimeIPP == 0 and childTimeOpenCL == 0:
309                     continue
310                 timeIPP = childTimeIPP * timeCoef
311                 timeOpenCL = childTimeOpenCL * timeCoef
312                 parentTask = task
313                 while parentTask:
314                     parentLocation = self.locations[parentTask.locationID]
315                     if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP:
316                         parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP
317                         timeIPP = 0
318                     else:
319                         parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP
320                     if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL:
321                         parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL
322                         timeOpenCL = 0
323                     else:
324                         parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL
325                     parentTask = self.getParentTask(parentTask)
326
327         dpprint(self.tasks)
328         dprint("Done")
329
330     def dump(self, max_entries):
331         assert isinstance(max_entries, int)
332
333         class CallInfo():
334             def __init__(self, callID):
335                 self.callID = callID
336                 self.totalTimes = []
337                 self.selfTimes = []
338                 self.threads = set()
339                 self.selfTimesIPP = []
340                 self.selfTimesOpenCL = []
341                 self.totalTimesIPP = []
342                 self.totalTimesOpenCL = []
343
344         calls = {}
345
346         for currentTask in self.tasks_list:
347             task = currentTask
348             callID = []
349             for i in range(stack_size):
350                 callID.append(task.locationID)
351                 task = self.getParentTask(task)
352                 if not task:
353                     break
354             callID = tuple(callID)
355             if not callID in calls:
356                 call = CallInfo(callID)
357                 calls[callID] = call
358             else:
359                 call = calls[callID]
360             call.totalTimes.append(currentTask.duration)
361             call.selfTimes.append(currentTask.selfDuration)
362             call.threads.add(currentTask.threadID)
363             call.selfTimesIPP.append(currentTask.selfTimeIPP)
364             call.selfTimesOpenCL.append(currentTask.selfTimeOpenCL)
365             call.totalTimesIPP.append(currentTask.totalTimeIPP)
366             call.totalTimesOpenCL.append(currentTask.totalTimeOpenCL)
367
368         dpprint(self.tasks)
369         dpprint(self.locations)
370         dpprint(calls)
371
372         calls_self_sum = {k: sum(v.selfTimes) for (k, v) in calls.items()}
373         calls_total_sum = {k: sum(v.totalTimes) for (k, v) in calls.items()}
374         calls_median = {k: median(v.selfTimes) for (k, v) in calls.items()}
375         calls_sorted = sorted(calls.keys(), key=lambda x: calls_self_sum[x], reverse=True)
376
377         calls_self_sum_IPP = {k: sum(v.selfTimesIPP) for (k, v) in calls.items()}
378         calls_total_sum_IPP = {k: sum(v.totalTimesIPP) for (k, v) in calls.items()}
379
380         calls_self_sum_OpenCL = {k: sum(v.selfTimesOpenCL) for (k, v) in calls.items()}
381         calls_total_sum_OpenCL = {k: sum(v.totalTimesOpenCL) for (k, v) in calls.items()}
382
383         if max_entries > 0 and len(calls_sorted) > max_entries:
384             calls_sorted = calls_sorted[:max_entries]
385
386         def formatPercents(p):
387             if p is not None:
388                 return "{:>3d}".format(int(p*100))
389             return ''
390
391         name_width = 70
392         timestamp_width = 12
393         def fmtTS():
394             return '{:>' + str(timestamp_width) + '}'
395         fmt = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2)
396         fmt2 = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2)
397         print(fmt.format("ID", "name", "count", "thr", "min", "max", "median", "avg", "*self*", "IPP", "%", "OpenCL", "%"))
398         print(fmt2.format("", "", "", "", "t-min", "t-max", "t-median", "t-avg", "total", "t-IPP", "%", "t-OpenCL", "%"))
399         for (index, callID) in enumerate(calls_sorted):
400             call_self_times = calls[callID].selfTimes
401             loc0 = self.locations[callID[0]]
402             loc_array = []  # [str(callID)]
403             for (i, l) in enumerate(callID):
404                 loc = self.locations[l]
405                 loc_array.append(loc.name if i > 0 else str(loc))
406             loc_str = '|'.join(loc_array)
407             if len(loc_str) > name_width: loc_str = loc_str[:name_width-3]+'...'
408             print(fmt.format(index + 1, loc_str, len(call_self_times),
409                     len(calls[callID].threads),
410                     formatTimestamp(min(call_self_times)),
411                     formatTimestamp(max(call_self_times)),
412                     formatTimestamp(calls_median[callID]),
413                     formatTimestamp(sum(call_self_times)/float(len(call_self_times))),
414                     formatTimestamp(sum(call_self_times)),
415                     formatTimestamp(calls_self_sum_IPP[callID]),
416                     formatPercents(calls_self_sum_IPP[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None),
417                     formatTimestamp(calls_self_sum_OpenCL[callID]),
418                     formatPercents(calls_self_sum_OpenCL[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None),
419                 ))
420             call_total_times = calls[callID].totalTimes
421             print(fmt2.format("", "", "", "",
422                     formatTimestamp(min(call_total_times)),
423                     formatTimestamp(max(call_total_times)),
424                     formatTimestamp(median(call_total_times)),
425                     formatTimestamp(sum(call_total_times)/float(len(call_total_times))),
426                     formatTimestamp(sum(call_total_times)),
427                     formatTimestamp(calls_total_sum_IPP[callID]),
428                     formatPercents(calls_total_sum_IPP[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None),
429                     formatTimestamp(calls_total_sum_OpenCL[callID]),
430                     formatPercents(calls_total_sum_OpenCL[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None),
431                 ))
432             print()
433
434 if __name__ == "__main__":
435     tracefile = sys.argv[1] if len(sys.argv) > 1 else 'OpenCVTrace.txt'
436     count = int(sys.argv[2]) if len(sys.argv) > 2 else 10
437     trace = Trace(tracefile)
438     trace.process()
439     trace.dump(max_entries = count)
440     print("OK")