Add a section of how to link IE with CMake project (#99)
[platform/upstream/dldt.git] / inference-engine / ie_bridges / python / sample / benchmark_app / benchmark / benchmark.py
1 """
2  Copyright (C) 2018-2019 Intel Corporation
3
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7
8       http://www.apache.org/licenses/LICENSE-2.0
9
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 """
16
17 from statistics import median
18 from openvino.inference_engine import IENetwork, IEPlugin
19
20 from .utils.benchmark_utils import *
21
22 def main(args=None):
23     try:
24         if args is None:
25             args = parse_args()
26
27         validate_args(args)
28
29         # --------------------------------- 1. Load Plugin for inference engine ---------------------------------
30         logging.info("Loading plugin")
31         plugin = IEPlugin(args.target_device)
32
33         config = dict()
34         if CPU_DEVICE_NAME in args.target_device:
35             if args.path_to_extension:
36                 plugin.add_cpu_extension(args.path_to_extension)
37             # limit threading for CPU portion of inference
38             if args.number_threads is not None:
39                 config.update({'CPU_THREADS_NUM': str(args.number_threads)})
40             # pin threads for CPU portion of inference
41             config.update({'CPU_BIND_THREAD': args.infer_threads_pinning})
42             # for pure CPU execution, more throughput-oriented execution via streams
43             if args.api_type == 'async' and CPU_DEVICE_NAME in args.target_device:
44                 config.update({'CPU_THROUGHPUT_STREAMS': str(args.number_infer_requests)})
45         elif GPU_DEVICE_NAME in args.target_device:
46             if args.path_to_cldnn_config:
47                 config.update({'CONFIG_FILE': args.path_to_cldnn_config})
48                 logger.info("GPU extensions is loaded {}".format(args.path_to_cldnn_config))
49         elif MYRIAD_DEVICE_NAME in args.target_device:
50             config.update({'LOG_LEVEL': 'LOG_INFO'})
51             config.update({'VPU_LOG_LEVEL': 'LOG_INFO'})
52
53         plugin.set_config(config)
54
55         logger.info("Device is {}".format(plugin.device))
56         logger.info("Plugin version is {}".format(plugin.version))
57
58         # --------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ---------------------
59         logger.info("Loading network files")
60
61         xml_filename = os.path.abspath(args.path_to_model)
62         head, tail = os.path.splitext(xml_filename)
63         bin_filename = os.path.abspath(head + BIN_EXTENSION)
64
65         ie_network = IENetwork(xml_filename, bin_filename)
66
67         input_info = ie_network.inputs
68
69         if len(input_info) == 0:
70             raise AttributeError('No inputs info is provided')
71         elif len(input_info) != 1:
72             raise AttributeError("only one input layer network is supported")
73
74         # -------------------------------------- 3. Change network batch_size  -------------------------------------
75         batch_size = ie_network.batch_size
76         key = list(input_info.keys()).pop()
77         precision = input_info[key].precision
78
79         if args.batch_size and args.batch_size != ie_network.batch_size:
80             # deepcopy input_info
81             shape = input_info[key].shape
82             # We support models having only one input layers
83             if input_info[key].layout != LAYOUT_TYPE:
84                 raise Exception('Unsupported model for batch size changing in automatic mode')
85             shape[BATCH_SIZE_ELEM] = args.batch_size
86             ie_network.reshape({key: shape})
87
88             input_info = ie_network.inputs
89
90             batch_size = args.batch_size
91
92
93         logger_message = "Network batch size was changed to: " if args.batch_size is not None else "Network batch size: "
94         logger_message += " {}, precision: {}".format(batch_size, precision)
95         logger.info(logger_message)
96
97         # ------------------------------------- 4. Loading model to the plugin -------------------------------------
98         logger.info("Loading model to the plugin")
99         exe_network = plugin.load(ie_network, args.number_infer_requests)
100
101         # ------------------------------------ 5. Performance measurements stuff -----------------------------------
102         inputs = get_images(os.path.abspath(args.path_to_images), batch_size)
103
104         if batch_size < len(inputs):
105             logger.warn("Network batch size {} is less then images count  {}"
106                         ", some input files will be ignored".format(batch_size, len(inputs)))
107
108         input_images = {key: fill_blob_with_image(inputs, input_info[key].shape)}
109
110         times = list()
111         duration = 0
112
113         if args.number_iterations is None:
114             duration = get_duration_in_secs(args.target_device)
115
116         if args.api_type == 'sync':
117
118             # warming up - out of scope
119             exe_network.infer(input_images)
120
121             if args.number_iterations is not None:
122                 logger.info(
123                     "Start inference synchronously ({}) sync inference executions".format(args.number_iterations))
124                 for iteration in range(args.number_iterations):
125                     sync_infer_request(exe_network, times, input_images)
126
127             else:
128                 logger.info("Start inference synchronously ({} s duration)".format(duration))
129                 start_time = datetime.now()
130                 current_time = start_time
131                 while (current_time - start_time).total_seconds() < duration:
132                     current_time = sync_infer_request(exe_network, times, input_images)
133
134             times.sort()
135             latency = median(times)
136             fps = batch_size / latency
137
138             print("[BENCHMARK RESULT] Latency is {:.4f} msec".format(latency * 1e3))
139             print("[BENCHMARK RESULT] Throughput is {:.4f} FPS".format(fps))
140         else:
141             infer_requests = exe_network.requests
142
143             if args.number_iterations is not None:
144                 logger.info("Start inference asynchronously ({}"
145                             " async inference executions, {} "
146                             " inference requests in parallel".format(args.number_iterations,
147                                                                        args.number_infer_requests))
148             else:
149                 logger.info("Start inference asynchronously ({} s duration, "
150                             "{} inference requests in parallel)".format(duration, args.number_infer_requests))
151
152             current_inference = 0
153             required_inference_requests_were_executed = False
154             previous_inference = 1 - args.number_infer_requests
155             step = 0
156             steps_count = args.number_infer_requests - 1
157             if args.number_iterations is not None:
158                 steps_count += args.number_iterations
159
160             # warming up - out of scope
161             infer_requests[0].async_infer(input_images)
162             infer_requests[0].wait()
163
164             start_time = datetime.now()
165             while not required_inference_requests_were_executed or step < steps_count or \
166                     args.number_iterations is None and (datetime.now() - start_time).total_seconds() < duration:
167                 exe_network.start_async(current_inference, input_images)
168
169                 if previous_inference >= 0:
170                     status = infer_requests[previous_inference].wait()
171                     if status is not 0:
172                         raise Exception("Infer request not completed successfully")
173
174                 current_inference += 1
175                 if current_inference >= args.number_infer_requests:
176                     current_inference = 0
177                     required_inference_requests_were_executed = True
178
179                 previous_inference += 1
180                 if previous_inference >= args.number_infer_requests:
181                     previous_inference = 0
182
183                 step += 1
184
185             # wait the latest inference executions
186             for not_completed_index in range(args.number_infer_requests):
187                 if infer_requests[not_completed_index].wait(0) != 0:
188                     infer_requests[not_completed_index].wait()
189
190             total_duration = (datetime.now() - start_time).total_seconds()
191             fps = batch_size * step / total_duration
192
193             print("[BENCHMARK RESULT] Throughput is {:.4f} FPS".format(fps))
194
195         del exe_network
196         del plugin
197
198     except Exception as e:
199         logging.exception(e)