model-optimizer/extensions/front/tf/RetinaNetFilteredDetectionsReplacement.py

   1 """
   2  Copyright (c) 2018-2019 Intel Corporation
   3
   4  Licensed under the Apache License, Version 2.0 (the "License");
   5  you may not use this file except in compliance with the License.
   6  You may obtain a copy of the License at
   7
   8       http://www.apache.org/licenses/LICENSE-2.0
   9
  10  Unless required by applicable law or agreed to in writing, software
  11  distributed under the License is distributed on an "AS IS" BASIS,
  12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  See the License for the specific language governing permissions and
  14  limitations under the License.
  15 """
  16
  17 import numpy as np
  18
  19 from extensions.ops.DetectionOutput import DetectionOutput
  20 from extensions.ops.splitv import SplitV
  21 from mo.front.subgraph_matcher import SubgraphMatch
  22 from mo.front.tf.replacement import FrontReplacementFromConfigFileSubGraph
  23 from mo.graph.graph import Node, Graph
  24 from mo.ops.concat import Concat
  25 from mo.ops.const import Const
  26 from mo.ops.eltwise import Eltwise
  27 from mo.ops.power import Power
  28 from mo.ops.reshape import Reshape
  29
  30
  31 class RetinaNetFilteredDetectionsReplacement(FrontReplacementFromConfigFileSubGraph):
  32     """
  33     The class replaces the sub-graph that performs boxes post-processing and NMS with the DetectionOutput layer.
  34
  35     The post-processing in the RetinaNet topology is performed differently from the DetectionOutput layer implementation
  36     in the Inference Engine. The first one calculates (d_x1, d_y1, d_x2, d_y2) which are a factor of the prior box width
  37     and height. The DetectionOuput with "code_type" equal to "caffe.PriorBoxParameter.CORNER" just adds predicted deltas
  38     to the prior box coordinates. This replacer add nodes which calculate prior box widths and heights, apply variances
  39     to the predicated box coordinates and multiply them. With this approach the DetectionOutput layer with "code_type"
  40     equal to "caffe.PriorBoxParameter.CORNER" produces the same result as the post-processing in the original topology.
  41     """
  42     replacement_id = 'RetinaNetFilteredDetectionsReplacement'
  43
  44     @staticmethod
  45     def _create_sub(graph: Graph, input_1: Node, port_1: int, input_2: Node, port_2: int):
  46         negate = Power(graph, dict(scale=-1, name=input_2.name + '/negate_'))
  47         add = Eltwise(graph, dict(operation='sum', name=input_1.name + '/add_'))
  48         out_node = add.create_node([(input_1, port_1), negate.create_node([(input_2, port_2)])])
  49         return out_node
  50
  51     def output_edges_match(self, graph: Graph, match: SubgraphMatch, new_sub_graph: dict):
  52         return {match.output_node(0)[0].id: new_sub_graph['detection_output_node'].id}
  53
  54     def nodes_to_remove(self, graph: Graph, match: SubgraphMatch):
  55         new_nodes_to_remove = match.matched_nodes_names()
  56         new_nodes_to_remove.remove(match.single_input_node(0)[0].id)
  57         new_nodes_to_remove.remove(match.single_input_node(1)[0].id)
  58         new_nodes_to_remove.remove(match.single_input_node(2)[0].id)
  59         return new_nodes_to_remove
  60
  61     def generate_sub_graph(self, graph: Graph, match: SubgraphMatch):
  62         reshape_classes_op = Reshape(graph, {'dim': np.array([0, -1])})
  63         reshape_classes_node = reshape_classes_op.create_node([match.single_input_node(1)[0]],
  64                                                               dict(name='do_reshape_classes'))
  65
  66         priors_node = match.single_input_node(2)[0]
  67
  68         placeholder = [Node(graph, node_id) for node_id in graph.nodes() if Node(graph, node_id).op == 'Placeholder'][0]
  69         im_height = placeholder.shape[1]
  70         im_width = placeholder.shape[2]
  71
  72         # scale prior boxes to the [0, 1] interval
  73         priors_scale_const_node = Const(graph, {'value': np.array([1 / im_width,
  74                                                                    1 / im_height,
  75                                                                    1 / im_width,
  76                                                                    1 / im_height])}).create_node([])
  77         priors_scale_node = Eltwise(graph, {'name': 'scale_priors', 'operation': 'mul'}).create_node(
  78             [priors_node, priors_scale_const_node])
  79
  80         # calculate prior boxes widths and heights
  81         split_node = SplitV(graph, {'axis': 2, 'size_splits': [1, 1, 1, 1], 'out_ports_count': 4}).create_node([priors_scale_node])
  82         priors_width_node = __class__._create_sub(graph, split_node, 2, split_node, 0)
  83         priors_height_node = __class__._create_sub(graph, split_node, 3, split_node, 1)
  84
  85         # concat weights and heights into a single tensor and multiple with the box coordinates regression values
  86         concat_width_height_node = Concat(graph, {'name': 'concat_priors_width_height', 'axis': -1, 'in_ports_count': 4}).create_node(
  87             [priors_width_node, priors_height_node, priors_width_node, priors_height_node])
  88         applied_width_height_regressions_node = Eltwise(graph, {'name': 'final_regressions', 'operation': 'mul'}). \
  89             create_node([concat_width_height_node, match.single_input_node(0)[0]])
  90
  91         # reshape to 2D tensor as Inference Engine Detection Output layer expects
  92         reshape_regression_op = Reshape(graph, {'dim': np.array([0, -1])})
  93         reshape_regression_node = reshape_regression_op.create_node([applied_width_height_regressions_node],
  94                                                                     {'name': 'reshape_regression'})
  95
  96         detection_output_op = DetectionOutput(graph, match.custom_replacement_desc.custom_attributes)
  97         detection_output_op.attrs['old_infer'] = detection_output_op.attrs['infer']
  98         detection_output_op.attrs['infer'] = __class__.do_infer
  99         detection_output_node = detection_output_op.create_node(
 100             [reshape_regression_node, reshape_classes_node, priors_scale_node],
 101             dict(name=detection_output_op.attrs['type'], clip=1, normalized=1, variance_encoded_in_target=0))
 102
 103         return {'detection_output_node': detection_output_node}
 104
 105     @staticmethod
 106     def do_infer(node):
 107         # append variances to the tensor with boxes regressions
 108         prior_boxes = node.in_node(2).value
 109         assert prior_boxes is not None, "The prior boxes are not constants"
 110         if prior_boxes is not None:
 111             variances = np.tile(node.variance, [prior_boxes.shape[-2], 1])
 112             prior_boxes = prior_boxes.reshape([-1, 4])
 113             prior_boxes = np.concatenate((prior_boxes, variances), 0)
 114             #  adding another dimensions, as the prior-boxes are expected as 3d tensor
 115             prior_boxes = prior_boxes.reshape((1, 2, -1))
 116             node.in_node(2).shape = np.array(prior_boxes.shape, dtype=np.int64)
 117             node.in_node(2).value = prior_boxes
 118
 119         node.old_infer(node)