tools/accuracy_checker/accuracy_checker/adapters/action_recognition.py

   1 """
   2 Copyright (c) 2019 Intel Corporation
   3
   4 Licensed under the Apache License, Version 2.0 (the "License");
   5 you may not use this file except in compliance with the License.
   6 You may obtain a copy of the License at
   7
   8       http://www.apache.org/licenses/LICENSE-2.0
   9
  10 Unless required by applicable law or agreed to in writing, software
  11 distributed under the License is distributed on an "AS IS" BASIS,
  12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 See the License for the specific language governing permissions and
  14 limitations under the License.
  15 """
  16
  17 import numpy as np
  18
  19 from ..adapters import Adapter
  20 from ..config import ConfigValidator, StringField, NumberField
  21 from ..representation import DetectionPrediction, ContainerPrediction
  22
  23
  24 class ActionDetectorConfig(ConfigValidator):
  25     type = StringField()
  26     priorbox_out = StringField()
  27     loc_out = StringField()
  28     main_conf_out = StringField()
  29     add_conf_out_prefix = StringField()
  30     add_conf_out_count = NumberField(optional=True, min_value=1)
  31     num_action_classes = NumberField()
  32     detection_threshold = NumberField(optional=True, floats=True, min_value=0, max_value=1)
  33
  34
  35 class ActionDetection(Adapter):
  36     __provider__ = 'action_detection'
  37
  38     def validate_config(self):
  39         action_detector_adapter_config = ActionDetectorConfig('ActionDetector_Config')
  40         action_detector_adapter_config.validate(self.launcher_config)
  41
  42     def configure(self):
  43         self.priorbox_out = self.launcher_config['priorbox_out']
  44         self.loc_out = self.launcher_config['loc_out']
  45         self.main_conf_out = self.launcher_config['main_conf_out']
  46         self.num_action_classes = self.launcher_config['num_action_classes']
  47         self.detection_threshold = self.launcher_config.get('detection_threshold', 0)
  48         add_conf_out_count = self.launcher_config.get('add_conf_out_count')
  49         add_conf_out_prefix = self.launcher_config['add_conf_out_prefix']
  50         if add_conf_out_count is None:
  51             self.add_conf_outs = [add_conf_out_prefix]
  52         else:
  53             self.add_conf_outs = []
  54             for num in np.arange(start=1, stop=add_conf_out_count + 1):
  55                 self.add_conf_outs.append('{}{}'.format(add_conf_out_prefix, num))
  56
  57     def process(self, raw, identifiers=None, frame_meta=None):
  58         result = []
  59         raw_outputs = self._extract_predictions(raw, frame_meta)
  60         prior_boxes = raw_outputs[self.priorbox_out][0][0].reshape(-1, 4)
  61         prior_variances = raw_outputs[self.priorbox_out][0][1].reshape(-1, 4)
  62         for batch_id, identifier in enumerate(identifiers):
  63             labels, class_scores, x_mins, y_mins, x_maxs, y_maxs, main_scores = self.prepare_detection_for_id(
  64                 batch_id, raw_outputs, prior_boxes, prior_variances
  65             )
  66             action_prediction = DetectionPrediction(identifier, labels, class_scores, x_mins, y_mins, x_maxs, y_maxs)
  67             person_prediction = DetectionPrediction(
  68                 identifier, [1] * len(labels), main_scores, x_mins, y_mins, x_maxs, y_maxs
  69             )
  70             result.append(ContainerPrediction({
  71                 'action_prediction': action_prediction, 'class_agnostic_prediction': person_prediction
  72             }))
  73
  74         return result
  75
  76     def prepare_detection_for_id(self, batch_id, raw_outputs, prior_boxes, prior_variances):
  77         num_detections = raw_outputs[self.loc_out][batch_id].size // 4
  78         locs = raw_outputs[self.loc_out][batch_id].reshape(-1, 4)
  79         main_conf = raw_outputs[self.main_conf_out][batch_id].reshape(num_detections, -1)
  80         add_confs = list(map(
  81             lambda layer: raw_outputs[layer][batch_id].reshape(-1, self.num_action_classes), self.add_conf_outs
  82         ))
  83         anchors_num = len(add_confs)
  84         labels, class_scores, x_mins, y_mins, x_maxs, y_maxs, main_scores = [], [], [], [], [], [], []
  85         for index in range(num_detections):
  86             if main_conf[index, 1] < self.detection_threshold:
  87                 continue
  88
  89             x_min, y_min, x_max, y_max = self.decode_box(prior_boxes[index], prior_variances[index], locs[index])
  90             action_confs = add_confs[index % anchors_num][index // anchors_num]
  91             action_label = np.argmax(action_confs)
  92             labels.append(action_label)
  93             class_scores.append(action_confs[action_label])
  94             x_mins.append(x_min)
  95             y_mins.append(y_min)
  96             x_maxs.append(x_max)
  97             y_maxs.append(y_max)
  98             main_scores.append(main_conf[index, 1])
  99
 100         return labels, class_scores, x_mins, y_mins, x_maxs, y_maxs, main_scores
 101
 102     @staticmethod
 103     def decode_box(prior, var, deltas):
 104         prior_width = prior[2] - prior[0]
 105         prior_height = prior[3] - prior[1]
 106         prior_center_x = (prior[0] + prior[2]) / 2
 107         prior_center_y = (prior[1] + prior[3]) / 2
 108
 109         decoded_box_center_x = var[0] * deltas[0] * prior_width + prior_center_x
 110         decoded_box_center_y = var[1] * deltas[1] * prior_height + prior_center_y
 111         decoded_box_width = np.exp(var[2] * deltas[2]) * prior_width
 112         decoded_box_height = np.exp(var[3] * deltas[3]) * prior_height
 113
 114         decoded_xmin = decoded_box_center_x - decoded_box_width / 2
 115         decoded_ymin = decoded_box_center_y - decoded_box_height / 2
 116         decoded_xmax = decoded_box_center_x + decoded_box_width / 2
 117         decoded_ymax = decoded_box_center_y + decoded_box_height / 2
 118
 119         return decoded_xmin, decoded_ymin, decoded_xmax, decoded_ymax