Publishing 2019 R1 content
[platform/upstream/dldt.git] / tools / calibration / aggregated_statistics.py
1 """
2 Copyright (C) 2018-2019 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8       http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 """
16
17 import json
18 import numpy
19 import openvino.inference_engine as ie
20 from .network_node_stats import NetworkNodeStats
21 from .shape import Shape
22
23
24 class AggregatedStatistics:
25     INDEX_MIN = 0
26     INDEX_MAX = 1
27
28     def __init__(self, result=None, ignore_layer_names: set=None, iterations_count: int = 1, dataset_size: int = 1):
29         self._ignore_layer_names = ignore_layer_names
30         self._registered_layers = None
31         self._iterations_count = iterations_count
32         self._dataset_size = dataset_size
33         self._itteration = 0
34
35         if result:
36             for inference_result in result.result:
37                 self.add(network = result.network, exec_network = result.exec_network, inference_result = inference_result)
38
39     def release(self):
40         if self._registered_layers:
41             del self._registered_layers
42             self._registered_layers = None
43
44     def add(
45         self,
46         network: ie.IENetwork,
47         exec_network: ie.ExecutableNetwork,
48         inference_result
49     ):
50         '''
51         Add inference result to aggregated statistics instance
52         '''
53         layer_names = network.layers.keys()
54
55         if not self._registered_layers:
56             self._registered_layers = dict()
57             initialized = False
58         else:
59             initialized = True
60
61         # TODO: can be refactored: we are itterating by all layers (to cover input layers output) to collect statistics
62         # for inference_result in inference_results:
63         for out_layer_name in layer_names:
64             if self._ignore_layer_names and out_layer_name in self._ignore_layer_names:
65                 continue
66
67             if out_layer_name in network.inputs:
68                 output_blob = exec_network.requests[0].inputs[out_layer_name]
69                 shape = Shape.create(network.inputs[out_layer_name].layout, output_blob.shape)
70             else:
71                 # TODO: can be refactored: we are itterating by all layers (to cover input layers output) to collect statistics
72                 if out_layer_name not in inference_result:
73                     continue
74                 output_blob = inference_result[out_layer_name]
75                 shape = Shape.create(network.outputs[out_layer_name].layout, output_blob.shape)
76
77             if not initialized:
78                 # for const layers N is not equal batch size
79                 # self._registered_layers[out_layer_name] = numpy.empty((shape.c, self._dataset_size, 2))
80                 self._registered_layers[out_layer_name] = numpy.empty((shape.c, shape.n * self._iterations_count, 2))
81
82             if shape.layout[0] != 'C' and not (len(shape.layout) >= 2 and shape.layout[0] == 'N' and shape.layout[1] == 'C'):
83                 raise ValueError("unsupported layout '{}'".format(shape.layout))
84
85             if shape.layout[0] != 'N':
86                 output_blob = [output_blob]
87
88             for sample in range(0, shape.n):
89                 for channel in range(0, shape.c):
90                     self.add_tensor_statistics(out_layer_name, output_blob, shape.n, sample, channel, self._itteration)
91
92         self._itteration += 1
93
94     def register_layer(self, layer_name: str):
95         if layer_name in self._registered_layers:
96             raise ValueError("layer '{}' has been added already".format(layer_name))
97
98         self._registered_layers[layer_name] = None
99
100     @property
101     def registered_layers(self):
102         return self._registered_layers
103
104     def add_tensor_statistics(self, layer_name: str, data, n: int, sample: int, channel: int, itteration: int):
105         channels = self._registered_layers[layer_name]
106
107         n_index = sample + n * itteration
108         if n_index >= channels.shape[1]:
109             channels.resize((channels.shape[0], channels.shape[1] + 1, channels.shape[2]), refcheck=False)
110
111         channels.itemset((channel, n_index, self.INDEX_MIN), data[sample][channel].min())
112         channels.itemset((channel, n_index, self.INDEX_MAX), data[sample][channel].max())
113
114     def get_number_channels(self, layer_name: str):
115         if layer_name in self._registered_layers:
116             return len(self._registered_layers[layer_name])
117         return 0
118
119     def get_data_min_max(self, layer_name: str, channel: int, threshold: float = None):
120         # take data by name
121         if layer_name in self._registered_layers:
122             layer = self._registered_layers[layer_name]
123             stats = layer[channel]
124
125             # having absolute min/max values, we can create new statistic
126             max_values = list()
127             min_values = list()
128             for tensor_statistic in stats:
129                 max_values.append(tensor_statistic.item(self.INDEX_MAX))
130                 min_values.append(tensor_statistic.item(self.INDEX_MIN))
131
132             # define number of elements to throw out
133             element_to_take = int(len(max_values) * threshold / 100) if threshold else len(max_values)
134             elements_to_throw = len(max_values) - element_to_take if threshold else 0
135
136             max_values.sort()
137             min_values.sort()
138
139             min = min_values[elements_to_throw]
140             max = max_values[element_to_take - 1]
141         else:
142             min = max = 0.0
143
144         return min, max
145
146     def serialize(self, json_file_path: str):
147         with open(json_file_path, 'w') as out_file:
148             json.dump(self._registered_layers, out_file)
149
150
151     def get_node_statistics(self, threshold = None):
152         net_nodes_stats = dict()
153         # go over all outputs and get aggregated statistics
154         for layer_name in self.registered_layers:
155             channels_count = self.get_number_channels(layer_name)
156
157             if layer_name not in net_nodes_stats:
158                 node_stats = NetworkNodeStats(channels_count)
159                 net_nodes_stats[layer_name] = node_stats
160             else:
161                 node_stats = net_nodes_stats[layer_name]
162
163             for channel in range(channels_count):
164                 node_stats.min_outputs[channel], node_stats.max_outputs[channel] = self.get_data_min_max(layer_name, channel, threshold)
165
166         return net_nodes_stats
167
168     def pop(self, ignore_layer_names: set):
169         for ignore_layer_name in ignore_layer_names:
170             self._registered_layers.pop(ignore_layer_name)