2 Copyright (c) 2019 Intel Corporation
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
8 http://www.apache.org/licenses/LICENSE-2.0
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
19 from ..adapters import Adapter
20 from ..config import ConfigValidator, StringField, NumberField
21 from ..representation import DetectionPrediction, ContainerPrediction
24 class ActionDetectorConfig(ConfigValidator):
26 priorbox_out = StringField()
27 loc_out = StringField()
28 main_conf_out = StringField()
29 add_conf_out_prefix = StringField()
30 add_conf_out_count = NumberField(optional=True, min_value=1)
31 num_action_classes = NumberField()
32 detection_threshold = NumberField(optional=True, floats=True, min_value=0, max_value=1)
35 class ActionDetection(Adapter):
36 __provider__ = 'action_detection'
38 def validate_config(self):
39 action_detector_adapter_config = ActionDetectorConfig('ActionDetector_Config')
40 action_detector_adapter_config.validate(self.launcher_config)
43 self.priorbox_out = self.launcher_config['priorbox_out']
44 self.loc_out = self.launcher_config['loc_out']
45 self.main_conf_out = self.launcher_config['main_conf_out']
46 self.num_action_classes = self.launcher_config['num_action_classes']
47 self.detection_threshold = self.launcher_config.get('detection_threshold', 0)
48 add_conf_out_count = self.launcher_config.get('add_conf_out_count')
49 add_conf_out_prefix = self.launcher_config['add_conf_out_prefix']
50 if add_conf_out_count is None:
51 self.add_conf_outs = [add_conf_out_prefix]
53 self.add_conf_outs = []
54 for num in np.arange(start=1, stop=add_conf_out_count + 1):
55 self.add_conf_outs.append('{}{}'.format(add_conf_out_prefix, num))
57 def process(self, raw, identifiers=None, frame_meta=None):
59 raw_outputs = self._extract_predictions(raw, frame_meta)
60 prior_boxes = raw_outputs[self.priorbox_out][0][0].reshape(-1, 4)
61 prior_variances = raw_outputs[self.priorbox_out][0][1].reshape(-1, 4)
62 for batch_id, identifier in enumerate(identifiers):
63 labels, class_scores, x_mins, y_mins, x_maxs, y_maxs, main_scores = self.prepare_detection_for_id(
64 batch_id, raw_outputs, prior_boxes, prior_variances
66 action_prediction = DetectionPrediction(identifier, labels, class_scores, x_mins, y_mins, x_maxs, y_maxs)
67 person_prediction = DetectionPrediction(
68 identifier, [1] * len(labels), main_scores, x_mins, y_mins, x_maxs, y_maxs
70 result.append(ContainerPrediction({
71 'action_prediction': action_prediction, 'class_agnostic_prediction': person_prediction
76 def prepare_detection_for_id(self, batch_id, raw_outputs, prior_boxes, prior_variances):
77 num_detections = raw_outputs[self.loc_out][batch_id].size // 4
78 locs = raw_outputs[self.loc_out][batch_id].reshape(-1, 4)
79 main_conf = raw_outputs[self.main_conf_out][batch_id].reshape(num_detections, -1)
81 lambda layer: raw_outputs[layer][batch_id].reshape(-1, self.num_action_classes), self.add_conf_outs
83 anchors_num = len(add_confs)
84 labels, class_scores, x_mins, y_mins, x_maxs, y_maxs, main_scores = [], [], [], [], [], [], []
85 for index in range(num_detections):
86 if main_conf[index, 1] < self.detection_threshold:
89 x_min, y_min, x_max, y_max = self.decode_box(prior_boxes[index], prior_variances[index], locs[index])
90 action_confs = add_confs[index % anchors_num][index // anchors_num]
91 action_label = np.argmax(action_confs)
92 labels.append(action_label)
93 class_scores.append(action_confs[action_label])
98 main_scores.append(main_conf[index, 1])
100 return labels, class_scores, x_mins, y_mins, x_maxs, y_maxs, main_scores
103 def decode_box(prior, var, deltas):
104 prior_width = prior[2] - prior[0]
105 prior_height = prior[3] - prior[1]
106 prior_center_x = (prior[0] + prior[2]) / 2
107 prior_center_y = (prior[1] + prior[3]) / 2
109 decoded_box_center_x = var[0] * deltas[0] * prior_width + prior_center_x
110 decoded_box_center_y = var[1] * deltas[1] * prior_height + prior_center_y
111 decoded_box_width = np.exp(var[2] * deltas[2]) * prior_width
112 decoded_box_height = np.exp(var[3] * deltas[3]) * prior_height
114 decoded_xmin = decoded_box_center_x - decoded_box_width / 2
115 decoded_ymin = decoded_box_center_y - decoded_box_height / 2
116 decoded_xmax = decoded_box_center_x + decoded_box_width / 2
117 decoded_ymax = decoded_box_center_y + decoded_box_height / 2
119 return decoded_xmin, decoded_ymin, decoded_xmax, decoded_ymax