Add a section of how to link IE with CMake project (#99)
[platform/upstream/dldt.git] / inference-engine / ie_bridges / python / sample / benchmark_app / benchmark.py
1 #!/usr/bin/env python
2 """
3  Copyright (c) 2018 Intel Corporation
4
5  Licensed under the Apache License, Version 2.0 (the "License");
6  you may not use this file except in compliance with the License.
7  You may obtain a copy of the License at
8
9       http://www.apache.org/licenses/LICENSE-2.0
10
11  Unless required by applicable law or agreed to in writing, software
12  distributed under the License is distributed on an "AS IS" BASIS,
13  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  See the License for the specific language governing permissions and
15  limitations under the License.
16 """
17
18 from statistics import median
19 from openvino.inference_engine import IENetwork, IEPlugin
20
21 from utils.benchmark_utils import *
22
23 def main(args=None):
24     try:
25         if args is None:
26             args = parse_args()
27
28         validate_args(args)
29
30         # --------------------------------- 1. Load Plugin for inference engine ---------------------------------
31         logging.info("Loading plugin")
32         plugin = IEPlugin(args.target_device)
33
34         config = dict()
35         if CPU_DEVICE_NAME in args.target_device:
36             if args.path_to_extension:
37                 plugin.add_cpu_extension(args.path_to_extension)
38             # limit threading for CPU portion of inference
39             if args.number_threads is not None:
40                 config.update({'CPU_THREADS_NUM': str(args.number_threads)})
41             # pin threads for CPU portion of inference
42             config.update({'CPU_BIND_THREAD': args.infer_threads_pinning})
43             # for pure CPU execution, more throughput-oriented execution via streams
44             if args.api_type == 'async' and CPU_DEVICE_NAME in args.target_device:
45                 config.update({'CPU_THROUGHPUT_STREAMS': str(args.number_infer_requests)})
46         elif GPU_DEVICE_NAME in args.target_device:
47             if args.path_to_cldnn_config:
48                 config.update({'CONFIG_FILE': args.path_to_cldnn_config})
49                 logger.info("GPU extensions is loaded {}".format(args.path_to_cldnn_config))
50         elif MYRIAD_DEVICE_NAME in args.target_device:
51             config.update({'LOG_LEVEL': 'LOG_INFO'})
52             config.update({'VPU_LOG_LEVEL': 'LOG_INFO'})
53
54         plugin.set_config(config)
55
56         logger.info("Device is {}".format(plugin.device))
57         logger.info("Plugin version is {}".format(plugin.version))
58
59         # --------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ---------------------
60         logger.info("Loading network files")
61
62         xml_filename = os.path.abspath(args.path_to_model)
63         head, tail = os.path.splitext(xml_filename)
64         bin_filename = os.path.abspath(head + BIN_EXTENSION)
65
66         ie_network = IENetwork(xml_filename, bin_filename)
67
68         input_info = ie_network.inputs
69
70         if len(input_info) == 0:
71             raise AttributeError('No inputs info is provided')
72         elif len(input_info) != 1:
73             raise AttributeError("only one input layer network is supported")
74
75         # -------------------------------------- 3. Change network batch_size  -------------------------------------
76         batch_size = ie_network.batch_size
77         key = list(input_info.keys()).pop()
78         precision = input_info[key].precision
79
80         if args.batch_size and args.batch_size != ie_network.batch_size:
81             # deepcopy input_info
82             shape = input_info[key].shape
83             # We support models having only one input layers
84             if input_info[key].layout != LAYOUT_TYPE:
85                 raise Exception('Unsupported model for batch size changing in automatic mode')
86             shape[BATCH_SIZE_ELEM] = args.batch_size
87             ie_network.reshape({key: shape})
88
89             input_info = ie_network.inputs
90
91             batch_size = args.batch_size
92
93
94         logger_message = "Network batch size was changed to: " if args.batch_size is not None else "Network batch size: "
95         logger_message += " {}, precision: {}".format(batch_size, precision)
96         logger.info(logger_message)
97
98         # ------------------------------------- 4. Loading model to the plugin -------------------------------------
99         logger.info("Loading model to the plugin")
100         exe_network = plugin.load(ie_network, args.number_infer_requests)
101
102         # ------------------------------------ 5. Performance measurements stuff -----------------------------------
103         inputs = get_images(os.path.abspath(args.path_to_images), batch_size)
104
105         if batch_size < len(inputs):
106             logger.warn("Network batch size {} is less then images count  {}"
107                         ", some input files will be ignored".format(batch_size, len(inputs)))
108
109         input_images = {key: fill_blob_with_image(inputs, input_info[key].shape)}
110
111         times = list()
112         duration = 0
113
114         if args.number_iterations is None:
115             duration = get_duration_in_secs(args.target_device)
116
117         if args.api_type == 'sync':
118
119             # warming up - out of scope
120             exe_network.infer(input_images)
121
122             if args.number_iterations is not None:
123                 logger.info(
124                     "Start inference synchronously ({}) sync inference executions".format(args.number_iterations))
125                 for iteration in range(args.number_iterations):
126                     sync_infer_request(exe_network, times, input_images)
127
128             else:
129                 logger.info("Start inference synchronously ({} s duration)".format(duration))
130                 start_time = datetime.now()
131                 current_time = start_time
132                 while (current_time - start_time).total_seconds() < duration:
133                     current_time = sync_infer_request(exe_network, times, input_images)
134
135             times.sort()
136             latency = median(times)
137             fps = batch_size / latency
138
139             print("[BENCHMARK RESULT] Latency is {:.4f} msec".format(latency * 1e3))
140             print("[BENCHMARK RESULT] Throughput is {:.4f} FPS".format(fps))
141         else:
142             infer_requests = exe_network.requests
143
144             if args.number_iterations is not None:
145                 logger.info("Start inference asynchronously ({}"
146                             " async inference executions, {} "
147                             " inference requests in parallel".format(args.number_iterations,
148                                                                        args.number_infer_requests))
149             else:
150                 logger.info("Start inference asynchronously ({} s duration, "
151                             "{} inference requests in parallel)".format(duration, args.number_infer_requests))
152
153             current_inference = 0
154             required_inference_requests_were_executed = False
155             previous_inference = 1 - args.number_infer_requests
156             step = 0
157             steps_count = args.number_infer_requests - 1
158             if args.number_iterations is not None:
159                 steps_count += args.number_iterations
160
161             # warming up - out of scope
162             infer_requests[0].async_infer(input_images)
163             infer_requests[0].wait()
164
165             start_time = datetime.now()
166             while not required_inference_requests_were_executed or step < steps_count or \
167                     args.number_iterations is None and (datetime.now() - start_time).total_seconds() < duration:
168                 exe_network.start_async(current_inference, input_images)
169
170                 if previous_inference >= 0:
171                     status = infer_requests[previous_inference].wait()
172                     if status is not 0:
173                         raise Exception("Infer request not completed successfully")
174
175                 current_inference += 1
176                 if current_inference >= args.number_infer_requests:
177                     current_inference = 0
178                     required_inference_requests_were_executed = True
179
180                 previous_inference += 1
181                 if previous_inference >= args.number_infer_requests:
182                     previous_inference = 0
183
184                 step += 1
185
186             # wait the latest inference executions
187             for not_completed_index in range(args.number_infer_requests):
188                 if infer_requests[not_completed_index].wait(0) != 0:
189                     infer_requests[not_completed_index].wait()
190
191             total_duration = (datetime.now() - start_time).total_seconds()
192             fps = batch_size * step / total_duration
193
194             print("[BENCHMARK RESULT] Throughput is {:.4f} FPS".format(fps))
195
196         del exe_network
197         del plugin
198
199     except Exception as e:
200         logging.exception(e)
201
202
203 if __name__ == "__main__":
204     main()