1 from __future__ import print_function
6 from pprint import pprint
7 from collections import deque
10 REGION_FLAG_IMPL_MASK = 15 << 16;
11 REGION_FLAG_IMPL_IPP = 1 << 16;
12 REGION_FLAG_IMPL_OPENCL = 2 << 16;
20 def dprint(args, **kwargs):
22 def dpprint(args, **kwargs):
26 if s.startswith('0x'):
35 if sys.version_info[0] < 3:
42 def formatTimestamp(t):
43 return "%.3f" % (t * 1e-6)
46 from statistics import median
49 sortedLst = sorted(lst)
51 index = (lstLen - 1) // 2
53 return sortedLst[index]
55 return (sortedLst[index] + sortedLst[index + 1]) * 0.5
57 def getCXXFunctionName(spec):
64 elif spec[pos] == '(':
67 if pos == 0 or spec[pos - 1] in ['#', ':']:
68 res = dropParams(spec[pos+1:-1])
69 return (spec[:pos] + res[0], res[1])
70 return (spec[:pos], spec[pos:])
74 def extractName(spec):
81 elif spec[pos].isalnum():
86 if spec.startswith('IPP') or spec.startswith('OpenCL'):
87 prefix_size = len('IPP') if spec.startswith('IPP') else len('OpenCL')
88 prefix = spec[:prefix_size]
89 if prefix_size < len(spec) and spec[prefix_size] in ['#', ':']:
90 prefix = prefix + spec[prefix_size]
91 prefix_size = prefix_size + 1
93 while begin < len(spec):
94 if spec[begin].isalnum() or spec[begin] in ['_', ':']:
97 if begin == len(spec):
100 while end < len(spec):
101 if not (spec[end].isalnum() or spec[end] in ['_', ':']):
104 return prefix + spec[begin:end]
106 spec = spec.replace(') const', ')') # const methods
107 (ret_type_name, params) = dropParams(spec)
108 name = extractName(ret_type_name)
109 if 'operator' in name:
111 if name.startswith('&'):
118 def __init__(self, filename=None):
122 self.threads_stack = {}
123 self.pending_files = deque()
128 def __init__(self, threadID, taskID, locationID, beginTimestamp):
129 self.threadID = threadID
131 self.locationID = locationID
132 self.beginTimestamp = beginTimestamp
133 self.endTimestamp = None
134 self.parentTaskID = None
135 self.parentThreadID = None
138 self.selfTimeOpenCL = 0
139 self.totalTimeIPP = 0
140 self.totalTimeOpenCL = 0
143 return "TID={} ID={} loc={} parent={}:{} begin={} end={} IPP={}/{} OpenCL={}/{}".format(
144 self.threadID, self.taskID, self.locationID, self.parentThreadID, self.parentTaskID,
145 self.beginTimestamp, self.endTimestamp, self.totalTimeIPP, self.selfTimeIPP, self.totalTimeOpenCL, self.selfTimeOpenCL)
149 def __init__(self, locationID, filename, line, name, flags):
150 self.locationID = locationID
151 self.filename = os.path.split(filename)[1]
153 self.name = getCXXFunctionName(name)
157 return "{}#{}:{}".format(self.name, self.filename, self.line)
160 return "ID={} {}:{}:{}".format(self.locationID, self.filename, self.line, self.name)
162 def parse_file(self, filename):
163 dprint("Process file: '{}'".format(filename))
164 with open(filename) as infile:
166 line = str(line).strip()
168 if line.startswith("#thread file:"):
169 name = str(line.split(':', 1)[1]).strip()
170 self.pending_files.append(os.path.join(os.path.split(filename)[0], name))
172 self.parse_line(line)
174 def parse_line(self, line):
175 opts = line.split(',')
178 opts = list(csv.reader([line]))[0] # process quote more
179 locationID = int(opts[1])
180 filename = str(opts[2])
183 flags = tryNum(opts[5])
184 self.locations[locationID] = self.TraceLocation(locationID, filename, line, name, flags)
190 (k, v) = e.split('=')
191 extra_opts[k] = tryNum(v)
198 if opts[0] in ['b', 'e']:
199 threadID = int(opts[1])
200 taskID = int(opts[4])
201 locationID = int(opts[3])
204 currentTask = (None, None)
205 if threadID is not None:
206 if not threadID in self.threads_stack:
207 thread_stack = deque()
208 self.threads_stack[threadID] = thread_stack
210 thread_stack = self.threads_stack[threadID]
211 currentTask = None if not thread_stack else thread_stack[-1]
212 t = (threadID, taskID)
214 assert not t in self.tasks, "Duplicate task: " + str(t) + repr(self.tasks[t])
215 task = self.TraceTask(threadID, taskID, locationID, ts)
217 self.tasks_list.append(task)
218 thread_stack.append((threadID, taskID))
220 task.parentThreadID = currentTask[0]
221 task.parentTaskID = currentTask[1]
222 if 'parentThread' in extra_opts:
223 task.parentThreadID = extra_opts['parentThread']
224 if 'parent' in extra_opts:
225 task.parentTaskID = extra_opts['parent']
228 task.endTimestamp = ts
229 if 'tIPP' in extra_opts:
230 task.selfTimeIPP = extra_opts['tIPP']
231 if 'tOCL' in extra_opts:
232 task.selfTimeOpenCL = extra_opts['tOCL']
235 def load(self, filename):
236 self.pending_files.append(filename)
238 with open(filename, 'r') as f:
239 print(f.read(), end='')
240 while self.pending_files:
241 self.parse_file(self.pending_files.pop())
243 def getParentTask(self, task):
244 return self.tasks.get((task.parentThreadID, task.parentTaskID), None)
247 self.tasks_list.sort(key=lambda x: x.beginTimestamp)
249 parallel_for_location = None
250 for (id, l) in self.locations.items():
251 if l.name == 'parallel_for':
252 parallel_for_location = l.locationID
255 for task in self.tasks_list:
257 task.duration = task.endTimestamp - task.beginTimestamp
258 task.selfDuration = task.duration
261 task.selfDuration = None
262 task.totalTimeIPP = task.selfTimeIPP
263 task.totalTimeOpenCL = task.selfTimeOpenCL
266 dprint("Calculate total times")
268 for task in self.tasks_list:
269 parentTask = self.getParentTask(task)
271 parentTask.selfDuration = parentTask.selfDuration - task.duration
272 parentTask.childTask.append(task)
273 timeIPP = task.selfTimeIPP
274 timeOpenCL = task.selfTimeOpenCL
276 if parentTask.locationID == parallel_for_location: # TODO parallel_for
278 parentLocation = self.locations[parentTask.locationID]
279 if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP:
280 parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP
283 parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP
284 if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL:
285 parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL
288 parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL
289 parentTask = self.getParentTask(parentTask)
292 dprint("Calculate total times (parallel_for)")
294 for task in self.tasks_list:
295 if task.locationID == parallel_for_location:
296 task.selfDuration = 0
297 childDuration = sum([t.duration for t in task.childTask])
298 if task.duration == 0 or childDuration == 0:
300 timeCoef = task.duration / float(childDuration)
301 childTimeIPP = sum([t.totalTimeIPP for t in task.childTask])
302 childTimeOpenCL = sum([t.totalTimeOpenCL for t in task.childTask])
303 if childTimeIPP == 0 and childTimeOpenCL == 0:
305 timeIPP = childTimeIPP * timeCoef
306 timeOpenCL = childTimeOpenCL * timeCoef
309 parentLocation = self.locations[parentTask.locationID]
310 if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP:
311 parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP
314 parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP
315 if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL:
316 parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL
319 parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL
320 parentTask = self.getParentTask(parentTask)
325 def dump(self, max_entries):
326 assert isinstance(max_entries, int)
329 def __init__(self, callID):
334 self.selfTimesIPP = []
335 self.selfTimesOpenCL = []
336 self.totalTimesIPP = []
337 self.totalTimesOpenCL = []
341 for currentTask in self.tasks_list:
344 for i in range(stack_size):
345 callID.append(task.locationID)
346 task = self.getParentTask(task)
349 callID = tuple(callID)
350 if not callID in calls:
351 call = CallInfo(callID)
355 call.totalTimes.append(currentTask.duration)
356 call.selfTimes.append(currentTask.selfDuration)
357 call.threads.add(currentTask.threadID)
358 call.selfTimesIPP.append(currentTask.selfTimeIPP)
359 call.selfTimesOpenCL.append(currentTask.selfTimeOpenCL)
360 call.totalTimesIPP.append(currentTask.totalTimeIPP)
361 call.totalTimesOpenCL.append(currentTask.totalTimeOpenCL)
364 dpprint(self.locations)
367 calls_self_sum = {k: sum(v.selfTimes) for (k, v) in calls.items()}
368 calls_total_sum = {k: sum(v.totalTimes) for (k, v) in calls.items()}
369 calls_median = {k: median(v.selfTimes) for (k, v) in calls.items()}
370 calls_sorted = sorted(calls.keys(), key=lambda x: calls_self_sum[x], reverse=True)
372 calls_self_sum_IPP = {k: sum(v.selfTimesIPP) for (k, v) in calls.items()}
373 calls_total_sum_IPP = {k: sum(v.totalTimesIPP) for (k, v) in calls.items()}
375 calls_self_sum_OpenCL = {k: sum(v.selfTimesOpenCL) for (k, v) in calls.items()}
376 calls_total_sum_OpenCL = {k: sum(v.totalTimesOpenCL) for (k, v) in calls.items()}
378 if max_entries > 0 and len(calls_sorted) > max_entries:
379 calls_sorted = calls_sorted[:max_entries]
381 def formatPercents(p):
383 return "{:>3d}".format(int(p*100))
389 return '{:>' + str(timestamp_width) + '}'
390 fmt = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2)
391 fmt2 = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2)
392 print(fmt.format("ID", "name", "count", "thr", "min", "max", "median", "avg", "*self*", "IPP", "%", "OpenCL", "%"))
393 print(fmt2.format("", "", "", "", "t-min", "t-max", "t-median", "t-avg", "total", "t-IPP", "%", "t-OpenCL", "%"))
394 for (index, callID) in enumerate(calls_sorted):
395 call_self_times = calls[callID].selfTimes
396 loc0 = self.locations[callID[0]]
397 loc_array = [] # [str(callID)]
398 for (i, l) in enumerate(callID):
399 loc = self.locations[l]
400 loc_array.append(loc.name if i > 0 else str(loc))
401 loc_str = '|'.join(loc_array)
402 if len(loc_str) > name_width: loc_str = loc_str[:name_width-3]+'...'
403 print(fmt.format(index + 1, loc_str, len(call_self_times),
404 len(calls[callID].threads),
405 formatTimestamp(min(call_self_times)),
406 formatTimestamp(max(call_self_times)),
407 formatTimestamp(calls_median[callID]),
408 formatTimestamp(sum(call_self_times)/float(len(call_self_times))),
409 formatTimestamp(sum(call_self_times)),
410 formatTimestamp(calls_self_sum_IPP[callID]),
411 formatPercents(calls_self_sum_IPP[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None),
412 formatTimestamp(calls_self_sum_OpenCL[callID]),
413 formatPercents(calls_self_sum_OpenCL[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None),
415 call_total_times = calls[callID].totalTimes
416 print(fmt2.format("", "", "", "",
417 formatTimestamp(min(call_total_times)),
418 formatTimestamp(max(call_total_times)),
419 formatTimestamp(median(call_total_times)),
420 formatTimestamp(sum(call_total_times)/float(len(call_total_times))),
421 formatTimestamp(sum(call_total_times)),
422 formatTimestamp(calls_total_sum_IPP[callID]),
423 formatPercents(calls_total_sum_IPP[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None),
424 formatTimestamp(calls_total_sum_OpenCL[callID]),
425 formatPercents(calls_total_sum_OpenCL[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None),
429 if __name__ == "__main__":
430 tracefile = sys.argv[1] if len(sys.argv) > 1 else 'OpenCVTrace.txt'
431 count = int(sys.argv[2]) if len(sys.argv) > 2 else 10
432 trace = Trace(tracefile)
434 trace.dump(max_entries = count)