inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/benchmark.py

   1 """
   2  Copyright (C) 2018-2019 Intel Corporation
   3
   4  Licensed under the Apache License, Version 2.0 (the "License");
   5  you may not use this file except in compliance with the License.
   6  You may obtain a copy of the License at
   7
   8       http://www.apache.org/licenses/LICENSE-2.0
   9
  10  Unless required by applicable law or agreed to in writing, software
  11  distributed under the License is distributed on an "AS IS" BASIS,
  12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  See the License for the specific language governing permissions and
  14  limitations under the License.
  15 """
  16
  17 from statistics import median
  18 from openvino.inference_engine import IENetwork, IECore, get_version
  19
  20 from .utils.parameters import *
  21 from .utils.inputs_filling import *
  22 from .utils.utils import *
  23 from .utils.infer_request_wrap import *
  24 from .utils.progress_bar import *
  25
  26 def getDurationInMilliseconds(duration):
  27     return duration * 1000
  28
  29 def static_vars(**kwargs):
  30     def decorate(func):
  31         for k in kwargs:
  32             setattr(func, k, kwargs[k])
  33         return func
  34     return decorate
  35
  36 @static_vars(step_id = 0)
  37 def next_step(additional_info = ""):
  38     step_names = {
  39         1  : "Parsing and validating input arguments",
  40         2  : "Loading Inference Engine",
  41         3  : "Read the Intermediate Representation of the network",
  42         4  : "Resizing network to match image sizes and given batch",
  43         5  : "Configuring input of the model",
  44         6  : "Setting device configuration",
  45         7  : "Loading the model to the device",
  46         8  : "Setting optimal runtime parameters",
  47         9  : "Creating infer requests and filling input blobs with images",
  48         10 : "Measuring performance",
  49         11 : "Dumping statistics report",
  50     }
  51
  52     next_step.step_id += 1
  53     if (next_step.step_id not in step_names.keys()):
  54         raise Exception("Step ID " + str(next_step.step_id) + " is out of total steps number " + len(step_names))
  55
  56     print("[Step {}/{}] {}".format(next_step.step_id, len(step_names), step_names[next_step.step_id]) + (" (" + additional_info + ")" if len(additional_info) else ""))
  57
  58 def main(args=None):
  59     try:
  60         # ------------------------------ 1. Parsing and validating input arguments -------------------------------------
  61         next_step()
  62
  63         if not args:
  64             args = parse_args()
  65
  66         # ------------------------------ 2. Loading Inference Engine ---------------------------------------------------
  67         next_step()
  68
  69         device_name = args.target_device.upper()
  70
  71         ie = IECore()
  72
  73         if CPU_DEVICE_NAME in device_name:
  74             if args.path_to_extension:
  75                 ie.add_extension(extension_path=args.path_to_extension, device_name=CPU_DEVICE_NAME)
  76         if GPU_DEVICE_NAME in device_name:
  77             if args.path_to_cldnn_config:
  78                 ie.set_config({'CONFIG_FILE' : args.path_to_cldnn_config}, GPU_DEVICE_NAME)
  79                 logger.info("GPU extensions is loaded {}".format(args.path_to_cldnn_config))
  80
  81         logger.info("InferenceEngine:\n{: <9}{}".format("",get_version()))
  82         version_string = "Device is {}\n".format(device_name)
  83         for device, version in ie.get_versions(device_name).items():
  84           version_string += "{: <9}{}\n".format("", device)
  85           version_string += "{: <9}{:.<24}{} {}.{}\n".format("",version.description," version", version.major, version.minor)
  86           version_string += "{: <9}{:.<24} {}\n".format("","Build", version.build_number)
  87         logger.info(version_string)
  88
  89         # --------------------- 3. Read the Intermediate Representation of the network ---------------------------------
  90         next_step()
  91
  92         xml_filename = os.path.abspath(args.path_to_model)
  93         head, tail = os.path.splitext(xml_filename)
  94         bin_filename = os.path.abspath(head + BIN_EXTENSION)
  95
  96         ie_network = IENetwork(xml_filename, bin_filename)
  97
  98         input_info = ie_network.inputs
  99
 100         if len(input_info) == 0:
 101             raise AttributeError('No inputs info is provided')
 102
 103         # --------------------- 4. Resizing network to match image sizes and given batch -------------------------------
 104         next_step()
 105
 106         batch_size = ie_network.batch_size
 107         precision = ie_network.precision
 108
 109         if args.batch_size and args.batch_size != ie_network.batch_size:
 110             new_shapes = {}
 111             for key in input_info.keys():
 112                 shape = input_info[key].shape
 113                 layout = input_info[key].layout
 114
 115                 batchIndex = -1
 116                 if ((layout == 'NCHW') or (layout == 'NCDHW') or
 117                     (layout == 'NHWC') or (layout == 'NDHWC') or
 118                     (layout == 'NC')):
 119                     batchIndex = 0
 120                 elif (layout == 'CN'):
 121                     batchIndex = 1
 122
 123                 if ((batchIndex != -1) and (shape[batchIndex] != args.batch_size)):
 124                     shape[batchIndex] = args.batch_size
 125                     new_shapes[key] = shape
 126
 127             if (len(new_shapes) > 0):
 128                 logger.info("Resizing network to batch = {}".format(args.batch_size))
 129                 ie_network.reshape(new_shapes)
 130
 131             batch_size = args.batch_size
 132
 133         logger.info("Network batch size: {}, precision {}".format(batch_size, precision))
 134
 135         # --------------------- 5. Configuring input of the model ------------------------------------------------------
 136         next_step()
 137
 138         for key in input_info.keys():
 139             if (isImage(input_info[key])):
 140                 # Set the precision of input data provided by the user
 141                 # Should be called before load of the network to the plugin
 142                 input_info[key].precision = 'U8'
 143
 144         # --------------------- 6. Setting device configuration --------------------------------------------------------
 145         next_step()
 146
 147         devices = parseDevices(device_name)
 148         device_nstreams = parseValuePerDevice(devices, args.number_streams)
 149         for device in devices:
 150           if device == CPU_DEVICE_NAME: ## CPU supports few special performance-oriented keys
 151             ## limit threading for CPU portion of inference
 152             if args.number_threads:
 153               ie.set_config({'CPU_THREADS_NUM': str(args.number_threads)}, device)
 154
 155             # pin threads for CPU portion of inference
 156             ie.set_config({'CPU_BIND_THREAD': args.infer_threads_pinning}, device)
 157
 158             ## for CPU execution, more throughput-oriented execution via streams
 159             # for pure CPU execution, more throughput-oriented execution via streams
 160             if args.api_type == 'async':
 161                 ie.set_config({'CPU_THROUGHPUT_STREAMS': str(device_nstreams.get(device))
 162                                                          if device in device_nstreams.keys()
 163                                                          else 'CPU_THROUGHPUT_AUTO' }, device)
 164             device_nstreams[device] = int(ie.get_config(device, 'CPU_THROUGHPUT_STREAMS'))
 165
 166           elif device == GPU_DEVICE_NAME:
 167             if args.api_type == 'async':
 168                 ie.set_config({'GPU_THROUGHPUT_STREAMS' : str(device_nstreams.get(device))
 169                                                           if device in device_nstreams.keys()
 170                                                           else 'GPU_THROUGHPUT_AUTO'}, device)
 171             device_nstreams[device] = int(ie.get_config(device, 'GPU_THROUGHPUT_STREAMS'))
 172
 173           elif device == MYRIAD_DEVICE_NAME:
 174             ie.set_config({'LOG_LEVEL': 'LOG_INFO',
 175                            'VPU_LOG_LEVEL': 'LOG_WARNING'}, MYRIAD_DEVICE_NAME)
 176
 177         # --------------------- 7. Loading the model to the device -----------------------------------------------------
 178         next_step()
 179
 180         config = { 'PERF_COUNT' : ('YES' if args.perf_counts else 'NO')}
 181
 182         exe_network = ie.load_network(ie_network,
 183                                       device_name,
 184                                       config=config,
 185                                       num_requests=args.number_infer_requests if args.number_infer_requests else 0)
 186
 187         # --------------------- 8. Setting optimal runtime parameters --------------------------------------------------
 188         next_step()
 189
 190         ## Number of requests
 191         infer_requests = exe_network.requests
 192         nireq = len(infer_requests)
 193
 194         ## Iteration limit
 195         niter = args.number_iterations
 196         if niter and args.api_type == 'async':
 197           niter = (int)((niter + nireq - 1)/nireq)*nireq
 198           if (args.number_iterations != niter):
 199             logger.warn("Number of iterations was aligned by request number "
 200                         "from {} to {} using number of requests {}".format(args.number_iterations, niter, nireq))
 201
 202         ## Time limit
 203         duration_seconds = 0
 204         if args.time:
 205           ## time limit
 206           duration_seconds = args.time
 207         elif not args.number_iterations:
 208           ## default time limit
 209           duration_seconds = get_duration_in_secs(device)
 210
 211         # ------------------------------------ 8. Creating infer requests and filling input blobs ----------------------
 212         next_step()
 213
 214         request_queue = InferRequestsQueue(infer_requests)
 215
 216         path_to_input = os.path.abspath(args.path_to_input) if args.path_to_input else None
 217         requests_input_data = getInputs(path_to_input, batch_size, ie_network.inputs, infer_requests)
 218
 219         # ------------------------------------ 9. Measuring performance ------------------------------------------------
 220
 221         progress_count = 0
 222         progress_bar_total_count = 10000
 223
 224         output_string = "Start inference {}ronously".format(args.api_type)
 225         if (args.api_type == "async"):
 226             if output_string != "":
 227                 output_string += ", "
 228
 229             output_string += str(nireq) + " inference requests"
 230             device_ss = ''
 231             for device, nstreams in device_nstreams.items():
 232                 if device_ss != '':
 233                     device_ss += ', '
 234                 device_ss += "{} streams for {}".format(str(nstreams), device)
 235             if device_ss != '':
 236                 output_string += " using " + device_ss
 237
 238         output_string += ", limits: "
 239         if niter:
 240             if not duration_seconds:
 241                 progress_bar_total_count = niter
 242             output_string += str(niter) + " iterations"
 243
 244         if duration_seconds:
 245             if niter:
 246                 output_string += ", "
 247             output_string += str(getDurationInMilliseconds(duration_seconds)) + " ms duration"
 248
 249         next_step(output_string)
 250
 251         ## warming up - out of scope
 252         infer_request = request_queue.getIdleRequest()
 253         if not infer_request:
 254             raise Exception("No idle Infer Requests!")
 255
 256         if (args.api_type == 'sync'):
 257             infer_request.infer(requests_input_data[infer_request.id])
 258         else:
 259             infer_request.startAsync(requests_input_data[infer_request.id])
 260
 261         request_queue.waitAll()
 262         request_queue.resetTimes()
 263
 264         start_time = datetime.now()
 265         exec_time = (datetime.now() - start_time).total_seconds()
 266         iteration = 0
 267
 268         progress_bar = ProgressBar(progress_bar_total_count, args.stream_output, args.progress)
 269
 270         ## Start inference & calculate performance
 271         ## to align number if iterations to guarantee that last infer requests are executed in the same conditions **/
 272         while ((niter and iteration < niter) or
 273                (duration_seconds and exec_time < duration_seconds) or
 274                (args.api_type == "async" and iteration % nireq != 0)):
 275             infer_request = request_queue.getIdleRequest()
 276             if not infer_request:
 277                 raise Exception("No idle Infer Requests!")
 278
 279             if (args.api_type == 'sync'):
 280                 infer_request.infer(requests_input_data[infer_request.id])
 281             else:
 282                 infer_request.startAsync(requests_input_data[infer_request.id])
 283             iteration += 1
 284
 285             exec_time = (datetime.now() - start_time).total_seconds()
 286
 287             if niter:
 288                 progress_bar.add_progress(1)
 289             else:
 290                 ## calculate how many progress intervals are covered by current iteration.
 291                 ## depends on the current iteration time and time of each progress interval.
 292                 ## Previously covered progress intervals must be skipped.
 293                 progress_interval_time = duration_seconds / progress_bar_total_count
 294                 new_progress = (int) (exec_time / progress_interval_time - progress_count)
 295                 progress_bar.add_progress(new_progress)
 296                 progress_count += new_progress
 297
 298         ## wait the latest inference executions
 299         request_queue.waitAll()
 300
 301         total_duration_sec = request_queue.getDurationInSeconds()
 302         times = request_queue.times
 303         times.sort()
 304         latency_ms = median(times)
 305         fps = batch_size * 1000 / latency_ms if args.api_type == 'sync' else batch_size * iteration / total_duration_sec
 306
 307         progress_bar.finish()
 308
 309         # ------------------------------------ 10. Dumping statistics report -------------------------------------------
 310         next_step()
 311
 312         if args.exec_graph_path:
 313             try:
 314               exec_graph_info = exe_network.get_exec_graph_info()
 315               exec_graph_info.serialize(args.exec_graph_path)
 316               logger.info("Executable graph is stored to {}".format(args.exec_graph_path))
 317               del exec_graph_info
 318             except Exception as e:
 319                 logging.exception(e)
 320
 321         if args.perf_counts:
 322             for ni in range(int(nireq)):
 323                 perf_counts = exe_network.requests[ni].get_perf_counts()
 324                 logger.info("Pefrormance counts for {}-th infer request".format(ni))
 325                 for layer, stats in perf_counts.items():
 326                     max_layer_name = 30
 327                     print("{:<30}{:<15}{:<30}{:<20}{:<20}{:<20}".format(layer[:max_layer_name - 4] + '...' if (len(layer) >= max_layer_name) else layer,
 328                                                                         stats['status'],
 329                                                                         'layerType: ' + str(stats['layer_type']),
 330                                                                         'realTime: ' + str(stats['real_time']),
 331                                                                         'cpu: ' + str(stats['cpu_time']),
 332                                                                         'execType: ' + str(stats['exec_type'])))
 333
 334         print("Count:      {} iterations".format(iteration))
 335         print("Duration:   {:.2f} ms".format(getDurationInMilliseconds(total_duration_sec)))
 336         print("Latency:    {:.4f} ms".format(latency_ms))
 337         print("Throughput: {:.2f} FPS".format(fps))
 338
 339         del exe_network
 340         del ie
 341         next_step.step_id = 0
 342     except Exception as e:
 343         logging.exception(e)