inference-engine/ie_bridges/python/sample/benchmark_app/benchmark.py

   1 #!/usr/bin/env python
   2 """
   3  Copyright (c) 2018 Intel Corporation
   4
   5  Licensed under the Apache License, Version 2.0 (the "License");
   6  you may not use this file except in compliance with the License.
   7  You may obtain a copy of the License at
   8
   9       http://www.apache.org/licenses/LICENSE-2.0
  10
  11  Unless required by applicable law or agreed to in writing, software
  12  distributed under the License is distributed on an "AS IS" BASIS,
  13  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  See the License for the specific language governing permissions and
  15  limitations under the License.
  16 """
  17
  18 from statistics import median
  19 from openvino.inference_engine import IENetwork, IEPlugin
  20
  21 from utils.benchmark_utils import *
  22
  23 def main(args=None):
  24     try:
  25         if args is None:
  26             args = parse_args()
  27
  28         validate_args(args)
  29
  30         # --------------------------------- 1. Load Plugin for inference engine ---------------------------------
  31         logging.info("Loading plugin")
  32         plugin = IEPlugin(args.target_device)
  33
  34         config = dict()
  35         if CPU_DEVICE_NAME in args.target_device:
  36             if args.path_to_extension:
  37                 plugin.add_cpu_extension(args.path_to_extension)
  38             # limit threading for CPU portion of inference
  39             if args.number_threads is not None:
  40                 config.update({'CPU_THREADS_NUM': str(args.number_threads)})
  41             # pin threads for CPU portion of inference
  42             config.update({'CPU_BIND_THREAD': args.infer_threads_pinning})
  43             # for pure CPU execution, more throughput-oriented execution via streams
  44             if args.api_type == 'async' and CPU_DEVICE_NAME in args.target_device:
  45                 config.update({'CPU_THROUGHPUT_STREAMS': str(args.number_infer_requests)})
  46         elif GPU_DEVICE_NAME in args.target_device:
  47             if args.path_to_cldnn_config:
  48                 config.update({'CONFIG_FILE': args.path_to_cldnn_config})
  49                 logger.info("GPU extensions is loaded {}".format(args.path_to_cldnn_config))
  50         elif MYRIAD_DEVICE_NAME in args.target_device:
  51             config.update({'LOG_LEVEL': 'LOG_INFO'})
  52             config.update({'VPU_LOG_LEVEL': 'LOG_INFO'})
  53
  54         plugin.set_config(config)
  55
  56         logger.info("Device is {}".format(plugin.device))
  57         logger.info("Plugin version is {}".format(plugin.version))
  58
  59         # --------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ---------------------
  60         logger.info("Loading network files")
  61
  62         xml_filename = os.path.abspath(args.path_to_model)
  63         head, tail = os.path.splitext(xml_filename)
  64         bin_filename = os.path.abspath(head + BIN_EXTENSION)
  65
  66         ie_network = IENetwork(xml_filename, bin_filename)
  67
  68         input_info = ie_network.inputs
  69
  70         if len(input_info) == 0:
  71             raise AttributeError('No inputs info is provided')
  72         elif len(input_info) != 1:
  73             raise AttributeError("only one input layer network is supported")
  74
  75         # -------------------------------------- 3. Change network batch_size  -------------------------------------
  76         batch_size = ie_network.batch_size
  77         key = list(input_info.keys()).pop()
  78         precision = input_info[key].precision
  79
  80         if args.batch_size and args.batch_size != ie_network.batch_size:
  81             # deepcopy input_info
  82             shape = input_info[key].shape
  83             # We support models having only one input layers
  84             if input_info[key].layout != LAYOUT_TYPE:
  85                 raise Exception('Unsupported model for batch size changing in automatic mode')
  86             shape[BATCH_SIZE_ELEM] = args.batch_size
  87             ie_network.reshape({key: shape})
  88
  89             input_info = ie_network.inputs
  90
  91             batch_size = args.batch_size
  92
  93
  94         logger_message = "Network batch size was changed to: " if args.batch_size is not None else "Network batch size: "
  95         logger_message += " {}, precision: {}".format(batch_size, precision)
  96         logger.info(logger_message)
  97
  98         # ------------------------------------- 4. Loading model to the plugin -------------------------------------
  99         logger.info("Loading model to the plugin")
 100         exe_network = plugin.load(ie_network, args.number_infer_requests)
 101
 102         # ------------------------------------ 5. Performance measurements stuff -----------------------------------
 103         inputs = get_images(os.path.abspath(args.path_to_images), batch_size)
 104
 105         if batch_size < len(inputs):
 106             logger.warn("Network batch size {} is less then images count  {}"
 107                         ", some input files will be ignored".format(batch_size, len(inputs)))
 108
 109         input_images = {key: fill_blob_with_image(inputs, input_info[key].shape)}
 110
 111         times = list()
 112         duration = 0
 113
 114         if args.number_iterations is None:
 115             duration = get_duration_in_secs(args.target_device)
 116
 117         if args.api_type == 'sync':
 118
 119             # warming up - out of scope
 120             exe_network.infer(input_images)
 121
 122             if args.number_iterations is not None:
 123                 logger.info(
 124                     "Start inference synchronously ({}) sync inference executions".format(args.number_iterations))
 125                 for iteration in range(args.number_iterations):
 126                     sync_infer_request(exe_network, times, input_images)
 127
 128             else:
 129                 logger.info("Start inference synchronously ({} s duration)".format(duration))
 130                 start_time = datetime.now()
 131                 current_time = start_time
 132                 while (current_time - start_time).total_seconds() < duration:
 133                     current_time = sync_infer_request(exe_network, times, input_images)
 134
 135             times.sort()
 136             latency = median(times)
 137             fps = batch_size / latency
 138
 139             print("[BENCHMARK RESULT] Latency is {:.4f} msec".format(latency * 1e3))
 140             print("[BENCHMARK RESULT] Throughput is {:.4f} FPS".format(fps))
 141         else:
 142             infer_requests = exe_network.requests
 143
 144             if args.number_iterations is not None:
 145                 logger.info("Start inference asynchronously ({}"
 146                             " async inference executions, {} "
 147                             " inference requests in parallel".format(args.number_iterations,
 148                                                                        args.number_infer_requests))
 149             else:
 150                 logger.info("Start inference asynchronously ({} s duration, "
 151                             "{} inference requests in parallel)".format(duration, args.number_infer_requests))
 152
 153             current_inference = 0
 154             required_inference_requests_were_executed = False
 155             previous_inference = 1 - args.number_infer_requests
 156             step = 0
 157             steps_count = args.number_infer_requests - 1
 158             if args.number_iterations is not None:
 159                 steps_count += args.number_iterations
 160
 161             # warming up - out of scope
 162             infer_requests[0].async_infer(input_images)
 163             infer_requests[0].wait()
 164
 165             start_time = datetime.now()
 166             while not required_inference_requests_were_executed or step < steps_count or \
 167                     args.number_iterations is None and (datetime.now() - start_time).total_seconds() < duration:
 168                 exe_network.start_async(current_inference, input_images)
 169
 170                 if previous_inference >= 0:
 171                     status = infer_requests[previous_inference].wait()
 172                     if status is not 0:
 173                         raise Exception("Infer request not completed successfully")
 174
 175                 current_inference += 1
 176                 if current_inference >= args.number_infer_requests:
 177                     current_inference = 0
 178                     required_inference_requests_were_executed = True
 179
 180                 previous_inference += 1
 181                 if previous_inference >= args.number_infer_requests:
 182                     previous_inference = 0
 183
 184                 step += 1
 185
 186             # wait the latest inference executions
 187             for not_completed_index in range(args.number_infer_requests):
 188                 if infer_requests[not_completed_index].wait(0) != 0:
 189                     infer_requests[not_completed_index].wait()
 190
 191             total_duration = (datetime.now() - start_time).total_seconds()
 192             fps = batch_size * step / total_duration
 193
 194             print("[BENCHMARK RESULT] Throughput is {:.4f} FPS".format(fps))
 195
 196         del exe_network
 197         del plugin
 198
 199     except Exception as e:
 200         logging.exception(e)
 201
 202
 203 if __name__ == "__main__":
 204     main()