Publishing 2019 R2 content (#223)
[platform/upstream/dldt.git] / inference-engine / ie_bridges / python / sample / benchmark_app / benchmark / benchmark.py
1 """
2  Copyright (C) 2018-2019 Intel Corporation
3
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7
8       http://www.apache.org/licenses/LICENSE-2.0
9
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 """
16
17 from statistics import median
18 from openvino.inference_engine import IENetwork, IECore, get_version
19
20 from .utils.parameters import *
21 from .utils.inputs_filling import *
22 from .utils.utils import *
23 from .utils.infer_request_wrap import *
24 from .utils.progress_bar import *
25
26 def getDurationInMilliseconds(duration):
27     return duration * 1000
28
29 def static_vars(**kwargs):
30     def decorate(func):
31         for k in kwargs:
32             setattr(func, k, kwargs[k])
33         return func
34     return decorate
35
36 @static_vars(step_id = 0)
37 def next_step(additional_info = ""):
38     step_names = {
39         1  : "Parsing and validating input arguments",
40         2  : "Loading Inference Engine",
41         3  : "Read the Intermediate Representation of the network",
42         4  : "Resizing network to match image sizes and given batch",
43         5  : "Configuring input of the model",
44         6  : "Setting device configuration",
45         7  : "Loading the model to the device",
46         8  : "Setting optimal runtime parameters",
47         9  : "Creating infer requests and filling input blobs with images",
48         10 : "Measuring performance",
49         11 : "Dumping statistics report",
50     }
51
52     next_step.step_id += 1
53     if (next_step.step_id not in step_names.keys()):
54         raise Exception("Step ID " + str(next_step.step_id) + " is out of total steps number " + len(step_names))
55
56     print("[Step {}/{}] {}".format(next_step.step_id, len(step_names), step_names[next_step.step_id]) + (" (" + additional_info + ")" if len(additional_info) else ""))
57
58 def main(args=None):
59     try:
60         # ------------------------------ 1. Parsing and validating input arguments -------------------------------------
61         next_step()
62
63         if not args:
64             args = parse_args()
65
66         # ------------------------------ 2. Loading Inference Engine ---------------------------------------------------
67         next_step()
68
69         device_name = args.target_device.upper()
70
71         ie = IECore()
72
73         if CPU_DEVICE_NAME in device_name:
74             if args.path_to_extension:
75                 ie.add_extension(extension_path=args.path_to_extension, device_name=CPU_DEVICE_NAME)
76         if GPU_DEVICE_NAME in device_name:
77             if args.path_to_cldnn_config:
78                 ie.set_config({'CONFIG_FILE' : args.path_to_cldnn_config}, GPU_DEVICE_NAME)
79                 logger.info("GPU extensions is loaded {}".format(args.path_to_cldnn_config))
80
81         logger.info("InferenceEngine:\n{: <9}{}".format("",get_version()))
82         version_string = "Device is {}\n".format(device_name)
83         for device, version in ie.get_versions(device_name).items():
84           version_string += "{: <9}{}\n".format("", device)
85           version_string += "{: <9}{:.<24}{} {}.{}\n".format("",version.description," version", version.major, version.minor)
86           version_string += "{: <9}{:.<24} {}\n".format("","Build", version.build_number)
87         logger.info(version_string)
88
89         # --------------------- 3. Read the Intermediate Representation of the network ---------------------------------
90         next_step()
91
92         xml_filename = os.path.abspath(args.path_to_model)
93         head, tail = os.path.splitext(xml_filename)
94         bin_filename = os.path.abspath(head + BIN_EXTENSION)
95
96         ie_network = IENetwork(xml_filename, bin_filename)
97
98         input_info = ie_network.inputs
99
100         if len(input_info) == 0:
101             raise AttributeError('No inputs info is provided')
102
103         # --------------------- 4. Resizing network to match image sizes and given batch -------------------------------
104         next_step()
105
106         batch_size = ie_network.batch_size
107         precision = ie_network.precision
108
109         if args.batch_size and args.batch_size != ie_network.batch_size:
110             new_shapes = {}
111             for key in input_info.keys():
112                 shape = input_info[key].shape
113                 layout = input_info[key].layout
114
115                 batchIndex = -1
116                 if ((layout == 'NCHW') or (layout == 'NCDHW') or
117                     (layout == 'NHWC') or (layout == 'NDHWC') or
118                     (layout == 'NC')):
119                     batchIndex = 0
120                 elif (layout == 'CN'):
121                     batchIndex = 1
122
123                 if ((batchIndex != -1) and (shape[batchIndex] != args.batch_size)):
124                     shape[batchIndex] = args.batch_size
125                     new_shapes[key] = shape
126
127             if (len(new_shapes) > 0):
128                 logger.info("Resizing network to batch = {}".format(args.batch_size))
129                 ie_network.reshape(new_shapes)
130
131             batch_size = args.batch_size
132
133         logger.info("Network batch size: {}, precision {}".format(batch_size, precision))
134
135         # --------------------- 5. Configuring input of the model ------------------------------------------------------
136         next_step()
137
138         for key in input_info.keys():
139             if (isImage(input_info[key])):
140                 # Set the precision of input data provided by the user
141                 # Should be called before load of the network to the plugin
142                 input_info[key].precision = 'U8'
143
144         # --------------------- 6. Setting device configuration --------------------------------------------------------
145         next_step()
146
147         devices = parseDevices(device_name)
148         device_nstreams = parseValuePerDevice(devices, args.number_streams)
149         for device in devices:
150           if device == CPU_DEVICE_NAME: ## CPU supports few special performance-oriented keys
151             ## limit threading for CPU portion of inference
152             if args.number_threads:
153               ie.set_config({'CPU_THREADS_NUM': str(args.number_threads)}, device)
154
155             # pin threads for CPU portion of inference
156             ie.set_config({'CPU_BIND_THREAD': args.infer_threads_pinning}, device)
157
158             ## for CPU execution, more throughput-oriented execution via streams
159             # for pure CPU execution, more throughput-oriented execution via streams
160             if args.api_type == 'async':
161                 ie.set_config({'CPU_THROUGHPUT_STREAMS': str(device_nstreams.get(device))
162                                                          if device in device_nstreams.keys()
163                                                          else 'CPU_THROUGHPUT_AUTO' }, device)
164             device_nstreams[device] = int(ie.get_config(device, 'CPU_THROUGHPUT_STREAMS'))
165
166           elif device == GPU_DEVICE_NAME:
167             if args.api_type == 'async':
168                 ie.set_config({'GPU_THROUGHPUT_STREAMS' : str(device_nstreams.get(device))
169                                                           if device in device_nstreams.keys()
170                                                           else 'GPU_THROUGHPUT_AUTO'}, device)
171             device_nstreams[device] = int(ie.get_config(device, 'GPU_THROUGHPUT_STREAMS'))
172
173           elif device == MYRIAD_DEVICE_NAME:
174             ie.set_config({'LOG_LEVEL': 'LOG_INFO',
175                            'VPU_LOG_LEVEL': 'LOG_WARNING'}, MYRIAD_DEVICE_NAME)
176
177         # --------------------- 7. Loading the model to the device -----------------------------------------------------
178         next_step()
179
180         config = { 'PERF_COUNT' : ('YES' if args.perf_counts else 'NO')}
181
182         exe_network = ie.load_network(ie_network,
183                                       device_name,
184                                       config=config,
185                                       num_requests=args.number_infer_requests if args.number_infer_requests else 0)
186
187         # --------------------- 8. Setting optimal runtime parameters --------------------------------------------------
188         next_step()
189
190         ## Number of requests
191         infer_requests = exe_network.requests
192         nireq = len(infer_requests)
193
194         ## Iteration limit
195         niter = args.number_iterations
196         if niter and args.api_type == 'async':
197           niter = (int)((niter + nireq - 1)/nireq)*nireq
198           if (args.number_iterations != niter):
199             logger.warn("Number of iterations was aligned by request number "
200                         "from {} to {} using number of requests {}".format(args.number_iterations, niter, nireq))
201
202         ## Time limit
203         duration_seconds = 0
204         if args.time:
205           ## time limit
206           duration_seconds = args.time
207         elif not args.number_iterations:
208           ## default time limit
209           duration_seconds = get_duration_in_secs(device)
210
211         # ------------------------------------ 8. Creating infer requests and filling input blobs ----------------------
212         next_step()
213
214         request_queue = InferRequestsQueue(infer_requests)
215
216         path_to_input = os.path.abspath(args.path_to_input) if args.path_to_input else None
217         requests_input_data = getInputs(path_to_input, batch_size, ie_network.inputs, infer_requests)
218
219         # ------------------------------------ 9. Measuring performance ------------------------------------------------
220
221         progress_count = 0
222         progress_bar_total_count = 10000
223
224         output_string = "Start inference {}ronously".format(args.api_type)
225         if (args.api_type == "async"):
226             if output_string != "":
227                 output_string += ", "
228
229             output_string += str(nireq) + " inference requests"
230             device_ss = ''
231             for device, nstreams in device_nstreams.items():
232                 if device_ss != '':
233                     device_ss += ', '
234                 device_ss += "{} streams for {}".format(str(nstreams), device)
235             if device_ss != '':
236                 output_string += " using " + device_ss
237
238         output_string += ", limits: "
239         if niter:
240             if not duration_seconds:
241                 progress_bar_total_count = niter
242             output_string += str(niter) + " iterations"
243
244         if duration_seconds:
245             if niter:
246                 output_string += ", "
247             output_string += str(getDurationInMilliseconds(duration_seconds)) + " ms duration"
248
249         next_step(output_string)
250
251         ## warming up - out of scope
252         infer_request = request_queue.getIdleRequest()
253         if not infer_request:
254             raise Exception("No idle Infer Requests!")
255
256         if (args.api_type == 'sync'):
257             infer_request.infer(requests_input_data[infer_request.id])
258         else:
259             infer_request.startAsync(requests_input_data[infer_request.id])
260
261         request_queue.waitAll()
262         request_queue.resetTimes()
263
264         start_time = datetime.now()
265         exec_time = (datetime.now() - start_time).total_seconds()
266         iteration = 0
267
268         progress_bar = ProgressBar(progress_bar_total_count, args.stream_output, args.progress)
269
270         ## Start inference & calculate performance
271         ## to align number if iterations to guarantee that last infer requests are executed in the same conditions **/
272         while ((niter and iteration < niter) or
273                (duration_seconds and exec_time < duration_seconds) or
274                (args.api_type == "async" and iteration % nireq != 0)):
275             infer_request = request_queue.getIdleRequest()
276             if not infer_request:
277                 raise Exception("No idle Infer Requests!")
278
279             if (args.api_type == 'sync'):
280                 infer_request.infer(requests_input_data[infer_request.id])
281             else:
282                 infer_request.startAsync(requests_input_data[infer_request.id])
283             iteration += 1
284
285             exec_time = (datetime.now() - start_time).total_seconds()
286
287             if niter:
288                 progress_bar.add_progress(1)
289             else:
290                 ## calculate how many progress intervals are covered by current iteration.
291                 ## depends on the current iteration time and time of each progress interval.
292                 ## Previously covered progress intervals must be skipped.
293                 progress_interval_time = duration_seconds / progress_bar_total_count
294                 new_progress = (int) (exec_time / progress_interval_time - progress_count)
295                 progress_bar.add_progress(new_progress)
296                 progress_count += new_progress
297
298         ## wait the latest inference executions
299         request_queue.waitAll()
300
301         total_duration_sec = request_queue.getDurationInSeconds()
302         times = request_queue.times
303         times.sort()
304         latency_ms = median(times)
305         fps = batch_size * 1000 / latency_ms if args.api_type == 'sync' else batch_size * iteration / total_duration_sec
306
307         progress_bar.finish()
308
309         # ------------------------------------ 10. Dumping statistics report -------------------------------------------
310         next_step()
311
312         if args.exec_graph_path:
313             try:
314               exec_graph_info = exe_network.get_exec_graph_info()
315               exec_graph_info.serialize(args.exec_graph_path)
316               logger.info("Executable graph is stored to {}".format(args.exec_graph_path))
317               del exec_graph_info
318             except Exception as e:
319                 logging.exception(e)
320
321         if args.perf_counts:
322             for ni in range(int(nireq)):
323                 perf_counts = exe_network.requests[ni].get_perf_counts()
324                 logger.info("Pefrormance counts for {}-th infer request".format(ni))
325                 for layer, stats in perf_counts.items():
326                     max_layer_name = 30
327                     print("{:<30}{:<15}{:<30}{:<20}{:<20}{:<20}".format(layer[:max_layer_name - 4] + '...' if (len(layer) >= max_layer_name) else layer,
328                                                                         stats['status'],
329                                                                         'layerType: ' + str(stats['layer_type']),
330                                                                         'realTime: ' + str(stats['real_time']),
331                                                                         'cpu: ' + str(stats['cpu_time']),
332                                                                         'execType: ' + str(stats['exec_type'])))
333
334         print("Count:      {} iterations".format(iteration))
335         print("Duration:   {:.2f} ms".format(getDurationInMilliseconds(total_duration_sec)))
336         print("Latency:    {:.4f} ms".format(latency_ms))
337         print("Throughput: {:.2f} FPS".format(fps))
338
339         del exe_network
340         del ie
341         next_step.step_id = 0
342     except Exception as e:
343         logging.exception(e)