tools/benchmark/benchmark.py

   1 """
   2  Copyright (C) 2018-2019 Intel Corporation
   3
   4  Licensed under the Apache License, Version 2.0 (the "License");
   5  you may not use this file except in compliance with the License.
   6  You may obtain a copy of the License at
   7
   8       http://www.apache.org/licenses/LICENSE-2.0
   9
  10  Unless required by applicable law or agreed to in writing, software
  11  distributed under the License is distributed on an "AS IS" BASIS,
  12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  See the License for the specific language governing permissions and
  14  limitations under the License.
  15 """
  16 from datetime import datetime
  17 from statistics import median
  18 from openvino.inference_engine import IENetwork, IECore, get_version
  19
  20 from .utils.constants import CPU_DEVICE_NAME, MULTI_DEVICE_NAME, GPU_DEVICE_NAME, MYRIAD_DEVICE_NAME
  21 from .utils.logging import logger
  22 from .utils.utils import get_duration_seconds, parse_value_per_device, parse_devices
  23
  24
  25
  26 class Benchmark:
  27     def __init__(self, device: str, number_infer_requests, number_iterations, duration_seconds, api_type):
  28         self.device = device.upper()
  29         self.ie = IECore()
  30         self.nireq = number_infer_requests
  31         self.niter = number_iterations
  32         self.duration_seconds = get_duration_seconds(duration_seconds, self.niter, self.device)
  33         self.api_type = api_type
  34         self.device_number_streams = {}
  35
  36     def __del__(self):
  37         del self.ie
  38
  39     def add_extension(self, path_to_extension: str=None, path_to_cldnn_config: str=None):
  40         if GPU_DEVICE_NAME in self.device:
  41             if path_to_cldnn_config:
  42                 self.ie.set_config({'CONFIG_FILE': path_to_cldnn_config}, GPU_DEVICE_NAME)
  43                 logger.info('GPU extensions is loaded {}'.format(path_to_cldnn_config))
  44         if CPU_DEVICE_NAME in self.device or MYRIAD_DEVICE_NAME in self.device:
  45             if path_to_extension:
  46                 self.ie.add_extension(extension_path=path_to_extension, device_name=CPU_DEVICE_NAME)
  47                 logger.info('CPU extensions is loaded {}'.format(path_to_extension))
  48
  49     def get_version_info(self) -> str:
  50         logger.info('InferenceEngine:\n{: <9}{:.<24} {}'.format('', 'API version', get_version()))
  51         version_string = 'Device info\n'
  52         for device, version in self.ie.get_versions(self.device).items():
  53             version_string += '{: <9}{}\n'.format('', device)
  54             version_string += '{: <9}{:.<24}{} {}.{}\n'.format('', version.description, ' version', version.major,
  55                                                                version.minor)
  56             version_string += '{: <9}{:.<24} {}\n'.format('', 'Build', version.build_number)
  57         return version_string
  58
  59     @staticmethod
  60     def reshape(ie_network: IENetwork, batch_size: int):
  61         new_shapes = {}
  62         for input_layer_name, input_layer in ie_network.inputs.items():
  63             shape = input_layer.shape
  64             layout = input_layer.layout
  65
  66             try:
  67                 batch_index = layout.index('N')
  68             except ValueError:
  69                 batch_index = 1 if layout == 'C' else -1
  70
  71             if batch_index != -1 and shape[batch_index] != batch_size:
  72                 shape[batch_index] = batch_size
  73                 new_shapes[input_layer_name] = shape
  74
  75         if new_shapes:
  76             logger.info('Resizing network to batch = {}'.format(batch_size))
  77             ie_network.reshape(new_shapes)
  78
  79     def set_config(self, number_streams: int, api_type: str = 'async',
  80                    number_threads: int = None, infer_threads_pinning: int = None):
  81         devices = parse_devices(self.device)
  82         self.device_number_streams = parse_value_per_device(devices, number_streams)
  83         for device in devices:
  84             if device == CPU_DEVICE_NAME:  # CPU supports few special performance-oriented keys
  85                 # limit threading for CPU portion of inference
  86                 if number_threads:
  87                     self.ie.set_config({'CPU_THREADS_NUM': str(number_threads)}, device)
  88
  89                 if MULTI_DEVICE_NAME in self.device and GPU_DEVICE_NAME in self.device:
  90                     self.ie.set_config({'CPU_BIND_THREAD': 'NO'}, CPU_DEVICE_NAME)
  91                 else:
  92                     # pin threads for CPU portion of inference
  93                     self.ie.set_config({'CPU_BIND_THREAD': infer_threads_pinning}, device)
  94
  95                 # for CPU execution, more throughput-oriented execution via streams
  96                 # for pure CPU execution, more throughput-oriented execution via streams
  97                 if api_type == 'async':
  98                     cpu_throughput = {'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO'}
  99                     if device in self.device_number_streams.keys():
 100                         cpu_throughput['CPU_THROUGHPUT_STREAMS'] = str(self.device_number_streams.get(device))
 101                     self.ie.set_config(cpu_throughput, device)
 102                     self.device_number_streams[device] = self.ie.get_config(device, 'CPU_THROUGHPUT_STREAMS')
 103
 104             elif device == GPU_DEVICE_NAME:
 105                 if api_type == 'async':
 106                     gpu_throughput = {'GPU_THROUGHPUT_STREAMS': 'GPU_THROUGHPUT_AUTO'}
 107                     if device in self.device_number_streams.keys():
 108                         gpu_throughput['GPU_THROUGHPUT_STREAMS'] = str(self.device_number_streams.get(device))
 109                     self.ie.set_config(gpu_throughput, device)
 110                     self.device_number_streams[device] = self.ie.get_config(device, 'GPU_THROUGHPUT_STREAMS')
 111
 112                 if MULTI_DEVICE_NAME in self.device and CPU_DEVICE_NAME in self.device:
 113                     # multi-device execution with the CPU+GPU performs best with GPU trottling hint,
 114                     # which releases another CPU thread (that is otherwise used by the GPU driver for active polling)
 115                     self.ie.set_config({'CLDNN_PLUGIN_THROTTLE': '1'}, device)
 116
 117             elif device == MYRIAD_DEVICE_NAME:
 118                 self.ie.set_config({'LOG_LEVEL': 'LOG_INFO',
 119                                     'VPU_LOG_LEVEL': 'LOG_WARNING'}, MYRIAD_DEVICE_NAME)
 120
 121     def load_network(self, ie_network: IENetwork, perf_counts: bool, number_infer_requests: int = None):
 122         config = {'PERF_COUNT': ('YES' if perf_counts else 'NO')}
 123
 124         exe_network = self.ie.load_network(ie_network,
 125                                            self.device,
 126                                            config=config,
 127                                            num_requests=number_infer_requests or 0)
 128
 129         return exe_network
 130
 131     def infer(self, request_queue, requests_input_data, batch_size, progress_bar):
 132         progress_count = 0
 133         # warming up - out of scope
 134         infer_request = request_queue.get_idle_request()
 135         if not infer_request:
 136             raise Exception('No idle Infer Requests!')
 137
 138         if self.api_type == 'sync':
 139             infer_request.infer(requests_input_data[infer_request.req_id])
 140         else:
 141             infer_request.start_async(requests_input_data[infer_request.req_id])
 142
 143         request_queue.wait_all()
 144         request_queue.reset_times()
 145
 146         start_time = datetime.now()
 147         exec_time = (datetime.now() - start_time).total_seconds()
 148         iteration = 0
 149
 150         # Start inference & calculate performance
 151         # to align number if iterations to guarantee that last infer requests are executed in the same conditions **/
 152         while (self.niter and iteration < self.niter) or \
 153               (self.duration_seconds and exec_time < self.duration_seconds) or \
 154               (self.api_type == 'async' and iteration % self.nireq):
 155             infer_request = request_queue.get_idle_request()
 156             if not infer_request:
 157                 raise Exception('No idle Infer Requests!')
 158
 159             if self.api_type == 'sync':
 160                 infer_request.infer(requests_input_data[infer_request.req_id])
 161             else:
 162                 infer_request.start_async(requests_input_data[infer_request.req_id])
 163             iteration += 1
 164
 165             exec_time = (datetime.now() - start_time).total_seconds()
 166
 167             if self.duration_seconds:
 168                 # calculate how many progress intervals are covered by current iteration.
 169                 # depends on the current iteration time and time of each progress interval.
 170                 # Previously covered progress intervals must be skipped.
 171                 progress_interval_time = self.duration_seconds / progress_bar.total_num
 172                 new_progress = int(exec_time / progress_interval_time - progress_count)
 173                 progress_bar.add_progress(new_progress)
 174                 progress_count += new_progress
 175             elif self.niter:
 176                 progress_bar.add_progress(1)
 177
 178         # wait the latest inference executions
 179         request_queue.wait_all()
 180
 181         total_duration_sec = request_queue.get_duration_in_seconds()
 182         times = request_queue.times
 183         times.sort()
 184         latency_ms = median(times)
 185         fps = batch_size * 1000 / latency_ms
 186         if self.api_type == 'async':
 187             fps = batch_size * iteration / total_duration_sec
 188         progress_bar.finish()
 189         return fps, latency_ms, total_duration_sec, iteration