1 from __future__ import print_function
6 from pprint import pprint
7 from collections import deque
15 REGION_FLAG_IMPL_MASK = 15 << 16
16 REGION_FLAG_IMPL_IPP = 1 << 16
17 REGION_FLAG_IMPL_OPENCL = 2 << 16
25 def dprint(args, **kwargs):
27 def dpprint(args, **kwargs):
31 if s.startswith('0x'):
40 if sys.version_info[0] < 3:
47 def formatTimestamp(t):
48 return "%.3f" % (t * 1e-6)
51 from statistics import median
54 sortedLst = sorted(lst)
56 index = (lstLen - 1) // 2
58 return sortedLst[index]
60 return (sortedLst[index] + sortedLst[index + 1]) * 0.5
62 def getCXXFunctionName(spec):
69 elif spec[pos] == '(':
72 if pos == 0 or spec[pos - 1] in ['#', ':']:
73 res = dropParams(spec[pos+1:-1])
74 return (spec[:pos] + res[0], res[1])
75 return (spec[:pos], spec[pos:])
79 def extractName(spec):
86 elif spec[pos].isalnum():
91 if spec.startswith('IPP') or spec.startswith('OpenCL'):
92 prefix_size = len('IPP') if spec.startswith('IPP') else len('OpenCL')
93 prefix = spec[:prefix_size]
94 if prefix_size < len(spec) and spec[prefix_size] in ['#', ':']:
95 prefix = prefix + spec[prefix_size]
96 prefix_size = prefix_size + 1
98 while begin < len(spec):
99 if spec[begin].isalnum() or spec[begin] in ['_', ':']:
102 if begin == len(spec):
105 while end < len(spec):
106 if not (spec[end].isalnum() or spec[end] in ['_', ':']):
109 return prefix + spec[begin:end]
111 spec = spec.replace(') const', ')') # const methods
112 (ret_type_name, params) = dropParams(spec)
113 name = extractName(ret_type_name)
114 if 'operator' in name:
116 if name.startswith('&'):
123 def __init__(self, filename=None):
127 self.threads_stack = {}
128 self.pending_files = deque()
133 def __init__(self, threadID, taskID, locationID, beginTimestamp):
134 self.threadID = threadID
136 self.locationID = locationID
137 self.beginTimestamp = beginTimestamp
138 self.endTimestamp = None
139 self.parentTaskID = None
140 self.parentThreadID = None
143 self.selfTimeOpenCL = 0
144 self.totalTimeIPP = 0
145 self.totalTimeOpenCL = 0
148 return "TID={} ID={} loc={} parent={}:{} begin={} end={} IPP={}/{} OpenCL={}/{}".format(
149 self.threadID, self.taskID, self.locationID, self.parentThreadID, self.parentTaskID,
150 self.beginTimestamp, self.endTimestamp, self.totalTimeIPP, self.selfTimeIPP, self.totalTimeOpenCL, self.selfTimeOpenCL)
154 def __init__(self, locationID, filename, line, name, flags):
155 self.locationID = locationID
156 self.filename = os.path.split(filename)[1]
158 self.name = getCXXFunctionName(name)
162 return "{}#{}:{}".format(self.name, self.filename, self.line)
165 return "ID={} {}:{}:{}".format(self.locationID, self.filename, self.line, self.name)
167 def parse_file(self, filename):
168 dprint("Process file: '{}'".format(filename))
169 with open(filename) as infile:
171 line = str(line).strip()
173 if line.startswith("#thread file:"):
174 name = str(line.split(':', 1)[1]).strip()
175 self.pending_files.append(os.path.join(os.path.split(filename)[0], name))
177 self.parse_line(line)
179 def parse_line(self, line):
180 opts = line.split(',')
183 opts = list(csv.reader([line]))[0] # process quote more
184 locationID = int(opts[1])
185 filename = str(opts[2])
188 flags = tryNum(opts[5])
189 self.locations[locationID] = self.TraceLocation(locationID, filename, line, name, flags)
195 (k, v) = e.split('=')
196 extra_opts[k] = tryNum(v)
203 if opts[0] in ['b', 'e']:
204 threadID = int(opts[1])
205 taskID = int(opts[4])
206 locationID = int(opts[3])
209 currentTask = (None, None)
210 if threadID is not None:
211 if not threadID in self.threads_stack:
212 thread_stack = deque()
213 self.threads_stack[threadID] = thread_stack
215 thread_stack = self.threads_stack[threadID]
216 currentTask = None if not thread_stack else thread_stack[-1]
217 t = (threadID, taskID)
219 assert not t in self.tasks, "Duplicate task: " + str(t) + repr(self.tasks[t])
220 task = self.TraceTask(threadID, taskID, locationID, ts)
222 self.tasks_list.append(task)
223 thread_stack.append((threadID, taskID))
225 task.parentThreadID = currentTask[0]
226 task.parentTaskID = currentTask[1]
227 if 'parentThread' in extra_opts:
228 task.parentThreadID = extra_opts['parentThread']
229 if 'parent' in extra_opts:
230 task.parentTaskID = extra_opts['parent']
233 task.endTimestamp = ts
234 if 'tIPP' in extra_opts:
235 task.selfTimeIPP = extra_opts['tIPP']
236 if 'tOCL' in extra_opts:
237 task.selfTimeOpenCL = extra_opts['tOCL']
240 def load(self, filename):
241 self.pending_files.append(filename)
243 with open(filename, 'r') as f:
244 print(f.read(), end='')
245 while self.pending_files:
246 self.parse_file(self.pending_files.pop())
248 def getParentTask(self, task):
249 return self.tasks.get((task.parentThreadID, task.parentTaskID), None)
252 self.tasks_list.sort(key=lambda x: x.beginTimestamp)
254 parallel_for_location = None
255 for (id, l) in self.locations.items():
256 if l.name == 'parallel_for':
257 parallel_for_location = l.locationID
260 for task in self.tasks_list:
262 task.duration = task.endTimestamp - task.beginTimestamp
263 task.selfDuration = task.duration
266 task.selfDuration = None
267 task.totalTimeIPP = task.selfTimeIPP
268 task.totalTimeOpenCL = task.selfTimeOpenCL
271 dprint("Calculate total times")
273 for task in self.tasks_list:
274 parentTask = self.getParentTask(task)
276 parentTask.selfDuration = parentTask.selfDuration - task.duration
277 parentTask.childTask.append(task)
278 timeIPP = task.selfTimeIPP
279 timeOpenCL = task.selfTimeOpenCL
281 if parentTask.locationID == parallel_for_location: # TODO parallel_for
283 parentLocation = self.locations[parentTask.locationID]
284 if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP:
285 parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP
288 parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP
289 if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL:
290 parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL
293 parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL
294 parentTask = self.getParentTask(parentTask)
297 dprint("Calculate total times (parallel_for)")
299 for task in self.tasks_list:
300 if task.locationID == parallel_for_location:
301 task.selfDuration = 0
302 childDuration = sum([t.duration for t in task.childTask])
303 if task.duration == 0 or childDuration == 0:
305 timeCoef = task.duration / float(childDuration)
306 childTimeIPP = sum([t.totalTimeIPP for t in task.childTask])
307 childTimeOpenCL = sum([t.totalTimeOpenCL for t in task.childTask])
308 if childTimeIPP == 0 and childTimeOpenCL == 0:
310 timeIPP = childTimeIPP * timeCoef
311 timeOpenCL = childTimeOpenCL * timeCoef
314 parentLocation = self.locations[parentTask.locationID]
315 if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_IPP:
316 parentTask.selfTimeIPP = parentTask.selfTimeIPP - timeIPP
319 parentTask.totalTimeIPP = parentTask.totalTimeIPP + timeIPP
320 if (parentLocation.flags & REGION_FLAG_IMPL_MASK) == REGION_FLAG_IMPL_OPENCL:
321 parentTask.selfTimeOpenCL = parentTask.selfTimeOpenCL - timeOpenCL
324 parentTask.totalTimeOpenCL = parentTask.totalTimeOpenCL + timeOpenCL
325 parentTask = self.getParentTask(parentTask)
330 def dump(self, max_entries):
331 assert isinstance(max_entries, int)
334 def __init__(self, callID):
339 self.selfTimesIPP = []
340 self.selfTimesOpenCL = []
341 self.totalTimesIPP = []
342 self.totalTimesOpenCL = []
346 for currentTask in self.tasks_list:
349 for i in range(stack_size):
350 callID.append(task.locationID)
351 task = self.getParentTask(task)
354 callID = tuple(callID)
355 if not callID in calls:
356 call = CallInfo(callID)
360 call.totalTimes.append(currentTask.duration)
361 call.selfTimes.append(currentTask.selfDuration)
362 call.threads.add(currentTask.threadID)
363 call.selfTimesIPP.append(currentTask.selfTimeIPP)
364 call.selfTimesOpenCL.append(currentTask.selfTimeOpenCL)
365 call.totalTimesIPP.append(currentTask.totalTimeIPP)
366 call.totalTimesOpenCL.append(currentTask.totalTimeOpenCL)
369 dpprint(self.locations)
372 calls_self_sum = {k: sum(v.selfTimes) for (k, v) in calls.items()}
373 calls_total_sum = {k: sum(v.totalTimes) for (k, v) in calls.items()}
374 calls_median = {k: median(v.selfTimes) for (k, v) in calls.items()}
375 calls_sorted = sorted(calls.keys(), key=lambda x: calls_self_sum[x], reverse=True)
377 calls_self_sum_IPP = {k: sum(v.selfTimesIPP) for (k, v) in calls.items()}
378 calls_total_sum_IPP = {k: sum(v.totalTimesIPP) for (k, v) in calls.items()}
380 calls_self_sum_OpenCL = {k: sum(v.selfTimesOpenCL) for (k, v) in calls.items()}
381 calls_total_sum_OpenCL = {k: sum(v.totalTimesOpenCL) for (k, v) in calls.items()}
383 if max_entries > 0 and len(calls_sorted) > max_entries:
384 calls_sorted = calls_sorted[:max_entries]
386 def formatPercents(p):
388 return "{:>3d}".format(int(p*100))
394 return '{:>' + str(timestamp_width) + '}'
395 fmt = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2)
396 fmt2 = "{:>3} {:<"+str(name_width)+"} {:>8} {:>3}"+((' '+fmtTS())*5)+((' '+fmtTS()+' {:>3}')*2)
397 print(fmt.format("ID", "name", "count", "thr", "min", "max", "median", "avg", "*self*", "IPP", "%", "OpenCL", "%"))
398 print(fmt2.format("", "", "", "", "t-min", "t-max", "t-median", "t-avg", "total", "t-IPP", "%", "t-OpenCL", "%"))
399 for (index, callID) in enumerate(calls_sorted):
400 call_self_times = calls[callID].selfTimes
401 loc0 = self.locations[callID[0]]
402 loc_array = [] # [str(callID)]
403 for (i, l) in enumerate(callID):
404 loc = self.locations[l]
405 loc_array.append(loc.name if i > 0 else str(loc))
406 loc_str = '|'.join(loc_array)
407 if len(loc_str) > name_width: loc_str = loc_str[:name_width-3]+'...'
408 print(fmt.format(index + 1, loc_str, len(call_self_times),
409 len(calls[callID].threads),
410 formatTimestamp(min(call_self_times)),
411 formatTimestamp(max(call_self_times)),
412 formatTimestamp(calls_median[callID]),
413 formatTimestamp(sum(call_self_times)/float(len(call_self_times))),
414 formatTimestamp(sum(call_self_times)),
415 formatTimestamp(calls_self_sum_IPP[callID]),
416 formatPercents(calls_self_sum_IPP[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None),
417 formatTimestamp(calls_self_sum_OpenCL[callID]),
418 formatPercents(calls_self_sum_OpenCL[callID] / float(calls_self_sum[callID])) if calls_self_sum[callID] > 0 else formatPercents(None),
420 call_total_times = calls[callID].totalTimes
421 print(fmt2.format("", "", "", "",
422 formatTimestamp(min(call_total_times)),
423 formatTimestamp(max(call_total_times)),
424 formatTimestamp(median(call_total_times)),
425 formatTimestamp(sum(call_total_times)/float(len(call_total_times))),
426 formatTimestamp(sum(call_total_times)),
427 formatTimestamp(calls_total_sum_IPP[callID]),
428 formatPercents(calls_total_sum_IPP[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None),
429 formatTimestamp(calls_total_sum_OpenCL[callID]),
430 formatPercents(calls_total_sum_OpenCL[callID] / float(calls_total_sum[callID])) if calls_total_sum[callID] > 0 else formatPercents(None),
434 if __name__ == "__main__":
435 tracefile = sys.argv[1] if len(sys.argv) > 1 else 'OpenCVTrace.txt'
436 count = int(sys.argv[2]) if len(sys.argv) > 2 else 10
437 trace = Trace(tracefile)
439 trace.dump(max_entries = count)