2 Copyright (C) 2018-2019 Intel Corporation
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
8 http://www.apache.org/licenses/LICENSE-2.0
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
17 from statistics import median
18 from openvino.inference_engine import IENetwork, IECore, get_version
20 from .utils.parameters import *
21 from .utils.inputs_filling import *
22 from .utils.utils import *
23 from .utils.infer_request_wrap import *
24 from .utils.progress_bar import *
26 def getDurationInMilliseconds(duration):
27 return duration * 1000
29 def static_vars(**kwargs):
32 setattr(func, k, kwargs[k])
36 @static_vars(step_id = 0)
37 def next_step(additional_info = ""):
39 1 : "Parsing and validating input arguments",
40 2 : "Loading Inference Engine",
41 3 : "Read the Intermediate Representation of the network",
42 4 : "Resizing network to match image sizes and given batch",
43 5 : "Configuring input of the model",
44 6 : "Setting device configuration",
45 7 : "Loading the model to the device",
46 8 : "Setting optimal runtime parameters",
47 9 : "Creating infer requests and filling input blobs with images",
48 10 : "Measuring performance",
49 11 : "Dumping statistics report",
52 next_step.step_id += 1
53 if (next_step.step_id not in step_names.keys()):
54 raise Exception("Step ID " + str(next_step.step_id) + " is out of total steps number " + len(step_names))
56 print("[Step {}/{}] {}".format(next_step.step_id, len(step_names), step_names[next_step.step_id]) + (" (" + additional_info + ")" if len(additional_info) else ""))
60 # ------------------------------ 1. Parsing and validating input arguments -------------------------------------
66 # ------------------------------ 2. Loading Inference Engine ---------------------------------------------------
69 device_name = args.target_device.upper()
73 if CPU_DEVICE_NAME in device_name:
74 if args.path_to_extension:
75 ie.add_extension(extension_path=args.path_to_extension, device_name=CPU_DEVICE_NAME)
76 if GPU_DEVICE_NAME in device_name:
77 if args.path_to_cldnn_config:
78 ie.set_config({'CONFIG_FILE' : args.path_to_cldnn_config}, GPU_DEVICE_NAME)
79 logger.info("GPU extensions is loaded {}".format(args.path_to_cldnn_config))
81 logger.info("InferenceEngine:\n{: <9}{}".format("",get_version()))
82 version_string = "Device is {}\n".format(device_name)
83 for device, version in ie.get_versions(device_name).items():
84 version_string += "{: <9}{}\n".format("", device)
85 version_string += "{: <9}{:.<24}{} {}.{}\n".format("",version.description," version", version.major, version.minor)
86 version_string += "{: <9}{:.<24} {}\n".format("","Build", version.build_number)
87 logger.info(version_string)
89 # --------------------- 3. Read the Intermediate Representation of the network ---------------------------------
92 xml_filename = os.path.abspath(args.path_to_model)
93 head, tail = os.path.splitext(xml_filename)
94 bin_filename = os.path.abspath(head + BIN_EXTENSION)
96 ie_network = IENetwork(xml_filename, bin_filename)
98 input_info = ie_network.inputs
100 if len(input_info) == 0:
101 raise AttributeError('No inputs info is provided')
103 # --------------------- 4. Resizing network to match image sizes and given batch -------------------------------
106 batch_size = ie_network.batch_size
107 precision = ie_network.precision
109 if args.batch_size and args.batch_size != ie_network.batch_size:
111 for key in input_info.keys():
112 shape = input_info[key].shape
113 layout = input_info[key].layout
116 if ((layout == 'NCHW') or (layout == 'NCDHW') or
117 (layout == 'NHWC') or (layout == 'NDHWC') or
120 elif (layout == 'CN'):
123 if ((batchIndex != -1) and (shape[batchIndex] != args.batch_size)):
124 shape[batchIndex] = args.batch_size
125 new_shapes[key] = shape
127 if (len(new_shapes) > 0):
128 logger.info("Resizing network to batch = {}".format(args.batch_size))
129 ie_network.reshape(new_shapes)
131 batch_size = args.batch_size
133 logger.info("Network batch size: {}, precision {}".format(batch_size, precision))
135 # --------------------- 5. Configuring input of the model ------------------------------------------------------
138 for key in input_info.keys():
139 if (isImage(input_info[key])):
140 # Set the precision of input data provided by the user
141 # Should be called before load of the network to the plugin
142 input_info[key].precision = 'U8'
144 # --------------------- 6. Setting device configuration --------------------------------------------------------
147 devices = parseDevices(device_name)
148 device_nstreams = parseValuePerDevice(devices, args.number_streams)
149 for device in devices:
150 if device == CPU_DEVICE_NAME: ## CPU supports few special performance-oriented keys
151 ## limit threading for CPU portion of inference
152 if args.number_threads:
153 ie.set_config({'CPU_THREADS_NUM': str(args.number_threads)}, device)
155 # pin threads for CPU portion of inference
156 ie.set_config({'CPU_BIND_THREAD': args.infer_threads_pinning}, device)
158 ## for CPU execution, more throughput-oriented execution via streams
159 # for pure CPU execution, more throughput-oriented execution via streams
160 if args.api_type == 'async':
161 ie.set_config({'CPU_THROUGHPUT_STREAMS': str(device_nstreams.get(device))
162 if device in device_nstreams.keys()
163 else 'CPU_THROUGHPUT_AUTO' }, device)
164 device_nstreams[device] = int(ie.get_config(device, 'CPU_THROUGHPUT_STREAMS'))
166 elif device == GPU_DEVICE_NAME:
167 if args.api_type == 'async':
168 ie.set_config({'GPU_THROUGHPUT_STREAMS' : str(device_nstreams.get(device))
169 if device in device_nstreams.keys()
170 else 'GPU_THROUGHPUT_AUTO'}, device)
171 device_nstreams[device] = int(ie.get_config(device, 'GPU_THROUGHPUT_STREAMS'))
173 elif device == MYRIAD_DEVICE_NAME:
174 ie.set_config({'LOG_LEVEL': 'LOG_INFO',
175 'VPU_LOG_LEVEL': 'LOG_WARNING'}, MYRIAD_DEVICE_NAME)
177 # --------------------- 7. Loading the model to the device -----------------------------------------------------
180 config = { 'PERF_COUNT' : ('YES' if args.perf_counts else 'NO')}
182 exe_network = ie.load_network(ie_network,
185 num_requests=args.number_infer_requests if args.number_infer_requests else 0)
187 # --------------------- 8. Setting optimal runtime parameters --------------------------------------------------
190 ## Number of requests
191 infer_requests = exe_network.requests
192 nireq = len(infer_requests)
195 niter = args.number_iterations
196 if niter and args.api_type == 'async':
197 niter = (int)((niter + nireq - 1)/nireq)*nireq
198 if (args.number_iterations != niter):
199 logger.warn("Number of iterations was aligned by request number "
200 "from {} to {} using number of requests {}".format(args.number_iterations, niter, nireq))
206 duration_seconds = args.time
207 elif not args.number_iterations:
208 ## default time limit
209 duration_seconds = get_duration_in_secs(device)
211 # ------------------------------------ 8. Creating infer requests and filling input blobs ----------------------
214 request_queue = InferRequestsQueue(infer_requests)
216 path_to_input = os.path.abspath(args.path_to_input) if args.path_to_input else None
217 requests_input_data = getInputs(path_to_input, batch_size, ie_network.inputs, infer_requests)
219 # ------------------------------------ 9. Measuring performance ------------------------------------------------
222 progress_bar_total_count = 10000
224 output_string = "Start inference {}ronously".format(args.api_type)
225 if (args.api_type == "async"):
226 if output_string != "":
227 output_string += ", "
229 output_string += str(nireq) + " inference requests"
231 for device, nstreams in device_nstreams.items():
234 device_ss += "{} streams for {}".format(str(nstreams), device)
236 output_string += " using " + device_ss
238 output_string += ", limits: "
240 if not duration_seconds:
241 progress_bar_total_count = niter
242 output_string += str(niter) + " iterations"
246 output_string += ", "
247 output_string += str(getDurationInMilliseconds(duration_seconds)) + " ms duration"
249 next_step(output_string)
251 ## warming up - out of scope
252 infer_request = request_queue.getIdleRequest()
253 if not infer_request:
254 raise Exception("No idle Infer Requests!")
256 if (args.api_type == 'sync'):
257 infer_request.infer(requests_input_data[infer_request.id])
259 infer_request.startAsync(requests_input_data[infer_request.id])
261 request_queue.waitAll()
262 request_queue.resetTimes()
264 start_time = datetime.now()
265 exec_time = (datetime.now() - start_time).total_seconds()
268 progress_bar = ProgressBar(progress_bar_total_count, args.stream_output, args.progress)
270 ## Start inference & calculate performance
271 ## to align number if iterations to guarantee that last infer requests are executed in the same conditions **/
272 while ((niter and iteration < niter) or
273 (duration_seconds and exec_time < duration_seconds) or
274 (args.api_type == "async" and iteration % nireq != 0)):
275 infer_request = request_queue.getIdleRequest()
276 if not infer_request:
277 raise Exception("No idle Infer Requests!")
279 if (args.api_type == 'sync'):
280 infer_request.infer(requests_input_data[infer_request.id])
282 infer_request.startAsync(requests_input_data[infer_request.id])
285 exec_time = (datetime.now() - start_time).total_seconds()
288 progress_bar.add_progress(1)
290 ## calculate how many progress intervals are covered by current iteration.
291 ## depends on the current iteration time and time of each progress interval.
292 ## Previously covered progress intervals must be skipped.
293 progress_interval_time = duration_seconds / progress_bar_total_count
294 new_progress = (int) (exec_time / progress_interval_time - progress_count)
295 progress_bar.add_progress(new_progress)
296 progress_count += new_progress
298 ## wait the latest inference executions
299 request_queue.waitAll()
301 total_duration_sec = request_queue.getDurationInSeconds()
302 times = request_queue.times
304 latency_ms = median(times)
305 fps = batch_size * 1000 / latency_ms if args.api_type == 'sync' else batch_size * iteration / total_duration_sec
307 progress_bar.finish()
309 # ------------------------------------ 10. Dumping statistics report -------------------------------------------
312 if args.exec_graph_path:
314 exec_graph_info = exe_network.get_exec_graph_info()
315 exec_graph_info.serialize(args.exec_graph_path)
316 logger.info("Executable graph is stored to {}".format(args.exec_graph_path))
318 except Exception as e:
322 for ni in range(int(nireq)):
323 perf_counts = exe_network.requests[ni].get_perf_counts()
324 logger.info("Pefrormance counts for {}-th infer request".format(ni))
325 for layer, stats in perf_counts.items():
327 print("{:<30}{:<15}{:<30}{:<20}{:<20}{:<20}".format(layer[:max_layer_name - 4] + '...' if (len(layer) >= max_layer_name) else layer,
329 'layerType: ' + str(stats['layer_type']),
330 'realTime: ' + str(stats['real_time']),
331 'cpu: ' + str(stats['cpu_time']),
332 'execType: ' + str(stats['exec_type'])))
334 print("Count: {} iterations".format(iteration))
335 print("Duration: {:.2f} ms".format(getDurationInMilliseconds(total_duration_sec)))
336 print("Latency: {:.4f} ms".format(latency_ms))
337 print("Throughput: {:.2f} FPS".format(fps))
341 next_step.step_id = 0
342 except Exception as e: