Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / src / gpu / detection_output_gpu.cpp
1 /*
2 // Copyright (c) 2016 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16
17 #include "detection_output_inst.h"
18 #include "primitive_gpu_base.h"
19 #include "error_handler.h"
20 #include "kernel_selector_helper.h"
21 #include "detection_output/detection_output_kernel_base.h"
22 #include "detection_output/detection_output_kernel_selector.h"
23
24 #ifdef FIX_OPENMP_RELEASE_ISSUE
25 #ifdef OPENMP_FOUND
26 #include <omp.h>
27 #endif
28 #endif
29
30 namespace cldnn { namespace gpu {
31
32 struct detection_output_gpu : typed_primitive_gpu_impl<detection_output>
33 {
34     using parent = typed_primitive_gpu_impl<detection_output>;
35     using parent::parent;
36
37 private:
38     static void setDetectOutSpecificParams(kernel_selector::detection_output_params::DedicatedParams& detectOutParams, const detection_output_node& arg)
39     {
40         auto primitive = arg.get_primitive();
41         detectOutParams.keep_top_k = primitive->keep_top_k;
42         detectOutParams.num_classes = primitive->num_classes;
43         detectOutParams.top_k = primitive->top_k;
44         detectOutParams.background_label_id = primitive->background_label_id;
45         detectOutParams.code_type = (int32_t)primitive->code_type;
46         detectOutParams.share_location = primitive->share_location;
47         detectOutParams.variance_encoded_in_target = primitive->variance_encoded_in_target;
48         detectOutParams.nms_threshold = primitive->nms_threshold;
49         detectOutParams.eta = primitive->eta;
50         detectOutParams.confidence_threshold = primitive->confidence_threshold;
51         detectOutParams.prior_coordinates_offset = primitive->prior_coordinates_offset;
52         detectOutParams.prior_info_size = primitive->prior_info_size;
53         detectOutParams.prior_is_normalized = primitive->prior_is_normalized;
54         detectOutParams.input_width = primitive->input_width;
55         detectOutParams.input_heigh = primitive->input_height;
56         detectOutParams.conf_size_x = arg.confidence().get_output_layout().get_buffer_size().spatial[0];
57         detectOutParams.conf_size_y = arg.confidence().get_output_layout().get_buffer_size().spatial[1];
58         detectOutParams.conf_padding_x = arg.confidence().get_output_layout().data_padding.lower_size().spatial[0];
59         detectOutParams.conf_padding_y = arg.confidence().get_output_layout().data_padding.lower_size().spatial[1];
60     }
61
62
63 public:
64
65     static primitive_impl* create(const detection_output_node& arg)
66     {
67         if (!arg.get_program().get_options().get<build_option_type::detection_output_gpu>()->enabled())
68         {
69             return runDetectOutCpu(arg);
70         }
71
72         auto detect_out_params = get_default_params<kernel_selector::detection_output_params>(arg);
73         auto detect_out_optional_params = get_default_optional_params<kernel_selector::detection_output_optional_params>(arg.get_program());
74
75         setDetectOutSpecificParams(detect_out_params.detectOutParams, arg);
76
77         auto& kernel_selector = kernel_selector::detection_output_kernel_selector::Instance();
78         auto best_kernels = kernel_selector.GetBestKernels(detect_out_params, detect_out_optional_params);
79
80         CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
81
82         auto detect_out = new detection_output_gpu(arg, best_kernels[0]);
83
84         return detect_out;
85     }
86 };
87
88 primitive_impl* runDetectOutGpu(const detection_output_node& arg, kernel_selector::KernelData kernel)
89 {
90     return new detection_output_gpu(arg, kernel);
91 }
92
93 /************************ Detection Output keep_top_k part ************************/
94
95 struct detection_output_sort_gpu : typed_primitive_gpu_impl<detection_output_sort>
96 {
97     using parent = typed_primitive_gpu_impl<detection_output_sort>;
98     using parent::parent;
99
100 private:
101     static void setDetectOutSpecificParams(kernel_selector::detection_output_params::DedicatedParams& detectOutParams, const detection_output_sort_node& arg)
102     {
103         if (arg.get_dependency(0).is_type<detection_output>())
104         {
105             auto primitive = arg.get_dependency(0).as<detection_output>().get_primitive();
106             detectOutParams.keep_top_k = primitive->keep_top_k;
107             detectOutParams.num_classes = primitive->num_classes;
108             detectOutParams.num_images = arg.get_dependency(0).as<detection_output>().location().get_output_layout().size.batch[0];
109             detectOutParams.top_k = primitive->top_k;
110             detectOutParams.share_location = primitive->share_location;
111             detectOutParams.background_label_id = primitive->background_label_id;
112         }
113         else
114         {
115             auto primitive = arg.get_primitive();
116             detectOutParams.keep_top_k = primitive->keep_top_k;
117             detectOutParams.num_classes = primitive->num_classes;
118             detectOutParams.num_images = primitive->num_images;
119             detectOutParams.top_k = primitive->top_k;
120             detectOutParams.share_location = primitive->share_location;
121             detectOutParams.background_label_id = primitive->background_label_id;
122         }
123     }
124
125 public:
126     static primitive_impl* create(const detection_output_sort_node& arg)
127     {
128         auto detect_out_params = get_default_params<kernel_selector::detection_output_params>(arg);
129         auto detect_out_optional_params = get_default_optional_params<kernel_selector::detection_output_optional_params>(arg.get_program());
130
131         setDetectOutSpecificParams(detect_out_params.detectOutParams, arg);
132
133         auto& kernel_selector = kernel_selector::detection_output_sort_kernel_selector::Instance();
134         auto best_kernels = kernel_selector.GetBestKernels(detect_out_params, detect_out_optional_params);
135
136         CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
137
138         auto detect_out = new detection_output_sort_gpu(arg, best_kernels[0]);
139
140         return detect_out;
141     }
142 };
143
144 primitive_impl* runDetectOutSortGpu(const detection_output_sort_node& arg, kernel_selector::KernelData kernel)
145 {
146     return new detection_output_sort_gpu(arg, kernel);
147 }
148
149 namespace {
150     struct attach {
151         attach() {
152             implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), detection_output_gpu::create);
153             implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), detection_output_gpu::create);
154             implementation_map<detection_output_sort>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), detection_output_sort_gpu::create);
155             implementation_map<detection_output_sort>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), detection_output_sort_gpu::create);
156         }
157         ~attach() {}
158     };
159     attach attach_impl;
160 }
161
162 }}